llvm-project/llvm/lib/Target/R600/AMDGPUInstrInfo.td

//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains DAG node defintions for the AMDGPU target.
//
//===----------------------------------------------------------------------===//

//===----------------------------------------------------------------------===//
// AMDGPU DAG Profiles
//===----------------------------------------------------------------------===//

def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;

def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;

def AMDGPULdExpOp : SDTypeProfile<1, 2,
  [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
>;

def AMDGPUFPClassOp : SDTypeProfile<1, 2,
  [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
>;

def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;

//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//

// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;

def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;

// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;

// out = 1.0 / a
def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;

// out = 1.0 / sqrt(a)
def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;

// out = 1.0 / sqrt(a)
def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;

// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;

def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;

def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;

// out = max(a, b) a and b are floats, where a nan comparison fails.
// This is not commutative because this gives the second operand:
//   x < nan ? x : nan -> nan
//   nan < x ? nan : x -> x
def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
  []
>;

def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;

// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
>;

// out = max(a, b) a and b are unsigned ints
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
>;

// out = min(a, b) a and b are floats, where a nan comparison fails.
def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
  []
>;

// out = min(a, b) a and b are signed ints
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
>;

// out = min(a, b) a and b are unsigned ints
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
  [SDNPCommutative, SDNPAssociative]
>;

// FIXME: TableGen doesn't like commutative instructions with more
// than 2 operands.
// out = max(a, b, c) a, b and c are floats
def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

// out = max(a, b, c) a, b, and c are signed ints
def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

// out = max(a, b, c) a, b and c are unsigned ints
def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

// out = min(a, b, c) a, b and c are floats
def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

// out = min(a, b, c) a, b and c are signed ints
def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

// out = min(a, b) a and b are unsigned ints
def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
  [/*SDNPCommutative, SDNPAssociative*/]
>;

def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
  SDTIntToFPOp, []>;
def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
  SDTIntToFPOp, []>;
def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
  SDTIntToFPOp, []>;
def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
  SDTIntToFPOp, []>;


// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
// out = (2^32 / a) + e
// e is rounding error
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;

// Special case divide preop and flags.
def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;

//  Special case divide FMA with scale and flags (src0 = Quotient,
//  src1 = Denominator, src2 = Numerator).
def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;

// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;

// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;

def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
                          SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                          [SDNPHasChain, SDNPMayLoad]>;

def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
                           [SDNPHasChain, SDNPMayStore]>;

// MSKOR instructions are atomic memory instructions used mainly for storing
// 8-bit and 16-bit values.  The definition is:
//
// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
//
// src0: vec4(src, 0, 0, mask)
// src1: dst - rat offset (aka pointer) in dwords
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
                        SDTypeProfile<0, 2, []>,
                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;

def AMDGPUround : SDNode<"ISD::FROUND",
                         SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;

def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;

def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;

// Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
// performing the mulitply.  The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
  [SDNPCommutative]
>;
def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
  [SDNPCommutative]
>;

def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
  []
>;
def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
  []
>;

//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
// Branch instruction where second and third are basic blocks
def SDTIL_BRCond : SDTypeProfile<0, 2, [
    SDTCisVT<0, OtherVT>
    ]>;

//===----------------------------------------------------------------------===//
// Flow Control DAG Nodes
//===----------------------------------------------------------------------===//
def IL_brcond      : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;

//===----------------------------------------------------------------------===//
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
    [SDNPHasChain, SDNPOptInGlue]>;
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------- tablegen --===//`
			`//`
			`// The LLVM Compiler Infrastructure`
			`//`
			`// This file is distributed under the University of Illinois Open Source`
			`// License. See LICENSE.TXT for details.`
			`//`
			`//===----------------------------------------------------------------------===//`
			`//`
			`// This file contains DAG node defintions for the AMDGPU target.`
			`//`
			`//===----------------------------------------------------------------------===//`

			`//===----------------------------------------------------------------------===//`
			`// AMDGPU DAG Profiles`
			`//===----------------------------------------------------------------------===//`

			`def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [`
			`SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>`
			`]>;`

R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`def AMDGPUTrigPreOp : SDTypeProfile<1, 2,`
			`[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]`
			`>;`

R600/SI: Add intrinsic for ldexp llvm-svn: 215734 2014-08-16 01:30:25 +08:00			`def AMDGPULdExpOp : SDTypeProfile<1, 2,`
			`[SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]`
			`>;`

R600/SI: Add class intrinsic llvm-svn: 225305 2015-01-07 07:00:37 +08:00			`def AMDGPUFPClassOp : SDTypeProfile<1, 2,`
			`[SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]`
			`>;`

R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`def AMDGPUDivScaleOp : SDTypeProfile<2, 3,`
			`[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]`
			`>;`

Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`//===----------------------------------------------------------------------===//`
			`// AMDGPU DAG Nodes`
			`//`

			`// This argument to this node is a dword address.`
			`def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;`

R600/SI: implement range reduction for sin/cos These instructions can only take a limited input range, and return the constant value 1 out of range. We should do range reduction to be able to process arbitrary values. Use a FRACT instruction after normalization to achieve this. Also add a test for constant folding with the lowered code with unsafe-fp-math enabled. v2: use DAG lowering instead of intrinsic, adapt test v3: calculate constant, fold pattern into instruction definition v4: misc style fixes, add sin-fold testcase, cosmetics Patch by Grigori Goronzy llvm-svn: 213458 2014-07-20 02:44:39 +08:00			`def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;`
			`def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;`

Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`// out = a - floor(a)`
			`def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;`

R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`// out = 1.0 / a`
			`def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;`

			`// out = 1.0 / sqrt(a)`
			`def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;`

R600: Fix inconsistency in rsq instructions. R600 was using a clamped version of rsq, but SI was not. Add a new rsq_clamped intrinsic and use them consistently. It's unclear to me from the documentation what behavior the R600 instructions have, so I assume they have the legacy behavior described by the SI documents. For R600, use RECIPSQRT_IEEE for both llvm.AMDGPU.rsq.legacy and llvm.AMDGPU.rsq. R600 also has RECIPSQRT_FF, which I'm not sure how it fits in here. llvm-svn: 211637 2014-06-25 06:13:39 +08:00			`// out = 1.0 / sqrt(a)`
			`def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;`

			`// out = 1.0 / sqrt(a) result clamped to +/- max_float.`
			`def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;`

R600/SI: Add intrinsic for ldexp llvm-svn: 215734 2014-08-16 01:30:25 +08:00			`def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;`

R600/SI: Add class intrinsic llvm-svn: 225305 2015-01-07 07:00:37 +08:00			`def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;`

R600/SI: Fix fmin_legacy / fmax_legacy matching for SI select_cc is expanded on SI, so this was never matched. llvm-svn: 221941 2014-11-14 07:03:09 +08:00			`// out = max(a, b) a and b are floats, where a nan comparison fails.`
			`// This is not commutative because this gives the second operand:`
			`// x < nan ? x : nan -> nan`
			`// nan < x ? nan : x -> x`
			`def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,`
R600/SI: fmin/fmax_legacy are not associative llvm-svn: 224093 2014-12-12 10:30:33 +08:00			`[]`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`>;`

R600: Mostly remove remaining AMDIL intrinsics. Delete all unused ones, and add new AMDGPU named intrinsics for the ones that are. Handle the old AMDIL names for comptability (although remove their GCCBuiltin names) and add tests since there weren't any for these before. llvm-svn: 210827 2014-06-13 05:15:44 +08:00			`def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;`
R600/SI: Use mad for fsub + fmul We can use a negate source modifier to match this for fsub. llvm-svn: 216735 2014-08-30 00:01:14 +08:00			`def AMDGPUmad : SDNode<"AMDGPUISD::MAD", SDTFPTernaryOp, []>;`
R600: Mostly remove remaining AMDIL intrinsics. Delete all unused ones, and add new AMDGPU named intrinsics for the ones that are. Handle the old AMDIL names for comptability (although remove their GCCBuiltin names) and add tests since there weren't any for these before. llvm-svn: 210827 2014-06-13 05:15:44 +08:00
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`// out = max(a, b) a and b are signed ints`
			`def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,`
			`[SDNPCommutative, SDNPAssociative]`
			`>;`

			`// out = max(a, b) a and b are unsigned ints`
			`def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,`
			`[SDNPCommutative, SDNPAssociative]`
			`>;`

R600/SI: Fix fmin_legacy / fmax_legacy matching for SI select_cc is expanded on SI, so this was never matched. llvm-svn: 221941 2014-11-14 07:03:09 +08:00			`// out = min(a, b) a and b are floats, where a nan comparison fails.`
			`def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,`
R600/SI: fmin/fmax_legacy are not associative llvm-svn: 224093 2014-12-12 10:30:33 +08:00			`[]`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`>;`

R600/SI: Combine min3/max3 instructions llvm-svn: 222032 2014-11-15 04:08:52 +08:00			`// out = min(a, b) a and b are signed ints`
Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,`
			`[SDNPCommutative, SDNPAssociative]`
			`>;`

			`// out = min(a, b) a and b are unsigned ints`
			`def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,`
			`[SDNPCommutative, SDNPAssociative]`
			`>;`

R600/SI: Combine min3/max3 instructions llvm-svn: 222032 2014-11-15 04:08:52 +08:00			`// FIXME: TableGen doesn't like commutative instructions with more`
			`// than 2 operands.`
			`// out = max(a, b, c) a, b and c are floats`
			`def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`

			`// out = max(a, b, c) a, b, and c are signed ints`
			`def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`

			`// out = max(a, b, c) a, b and c are unsigned ints`
			`def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`

			`// out = min(a, b, c) a, b and c are floats`
			`def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`

			`// out = min(a, b, c) a, b and c are signed ints`
			`def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`

			`// out = min(a, b) a and b are unsigned ints`
			`def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,`
			`[/SDNPCommutative, SDNPAssociative/]`
			`>;`
R600/SI: Use v_cvt_f32_ubyte* instructions This eliminates extra extract instructions when loading an i8 vector to a float vector. llvm-svn: 210666 2014-06-12 01:50:44 +08:00
			`def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",`
			`SDTIntToFPOp, []>;`
			`def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",`
			`SDTIntToFPOp, []>;`
			`def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",`
			`SDTIntToFPOp, []>;`
			`def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",`
			`SDTIntToFPOp, []>;`


Add R600 backend A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX llvm-svn: 169915 2012-12-12 05:25:42 +08:00			`// urecip - This operation is a helper for integer division, it returns the`
			`// result of 1 / a as a fractional unsigned integer.`
			`// out = (2^32 / a) + e`
			`// e is rounding error`
			`def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;`

R600/SI: Add intrinsics for various math instructions. These will be used for custom lowering and for library implementations of various math functions, so it's useful to expose these as builtins. llvm-svn: 211247 2014-06-19 09:19:19 +08:00			`// Special case divide preop and flags.`
			`def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;`

			`// Special case divide FMA with scale and flags (src0 = Quotient,`
			`// src1 = Denominator, src2 = Numerator).`
			`def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;`

			`// Single or double precision division fixup.`
			`// Special case divide fixup and flags(src0 = Quotient, src1 =`
			`// Denominator, src2 = Numerator).`
			`def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;`

			`// Look Up 2.0 / pi src0 with segment select src1[4:0]`
			`def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;`

R600: Support for indirect addressing v4 Only implemented for R600 so far. SI is missing implementations of a few callbacks used by the Indirect Addressing pass and needs code to handle frame indices. At the moment R600 only supports array sizes of 16 dwords or less. Register packing of vector types is currently disabled, which means that a vec4 is stored in T0_X, T1_X, T2_X, T3_X, rather than T0_XYZW. In order to correctly pack registers in all cases, we will need to implement an analysis pass for R600 that determines the correct vector width for each array. v2: - Add support for i8 zext load from stack. - Coding style fixes v3: - Don't reserve registers for indirect addressing when it isn't being used. - Fix bug caused by LLVM limiting the number of SubRegIndex declarations. v4: - Fix 64-bit defines llvm-svn: 174525 2013-02-07 01:32:29 +08:00			`def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",`
			`SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,`
			`[SDNPHasChain, SDNPMayLoad]>;`

			`def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",`
			`SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,`
			`[SDNPHasChain, SDNPMayStore]>;`
R600: Add support for i16 and i8 global stores Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 188519 2013-08-16 09:12:06 +08:00
R600: Add support for i8 and i16 local memory stores llvm-svn: 189223 2013-08-26 23:05:49 +08:00			`// MSKOR instructions are atomic memory instructions used mainly for storing`
			`// 8-bit and 16-bit values. The definition is:`
			`//`
			`// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) \| src)`
			`//`
			`// src0: vec4(src, 0, 0, mask)`
R600/SI: Fix fmin_legacy / fmax_legacy matching for SI select_cc is expanded on SI, so this was never matched. llvm-svn: 221941 2014-11-14 07:03:09 +08:00			`// src1: dst - rat offset (aka pointer) in dwords`
R600: Add support for i16 and i8 global stores Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 188519 2013-08-16 09:12:06 +08:00			`def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",`
			`SDTypeProfile<0, 2, []>,`
			`[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;`
R600: Add support for ISD::FROUND NOTE: This is a candidate for the 3.4 branch. llvm-svn: 195878 2013-11-28 05:23:20 +08:00
			`def AMDGPUround : SDNode<"ISD::FROUND",`
			`SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;`
R600: Match sign_extend_inreg to BFE instructions llvm-svn: 204072 2014-03-18 02:58:11 +08:00
			`def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>;`
			`def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;`
R600: Add target nodes for BFM and BFI llvm-svn: 205235 2014-04-01 02:21:13 +08:00			`def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;`
			`def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;`
R600: Match sign_extend_inreg to BFE instructions llvm-svn: 204072 2014-03-18 02:58:11 +08:00
R600/SI: Add intrinsics for brev instructions llvm-svn: 211187 2014-06-19 01:13:57 +08:00			`def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;`

R600: Match 24-bit arithmetic patterns in a Target DAGCombine Moving these patterns from TableGen files to PerformDAGCombine() should allow us to generate better code by eliminating unnecessary shifts and extensions earlier. This also fixes a bug where the MAD pattern was calling SimplifyDemandedBits with a 24-bit mask on the first operand even when the full pattern wasn't being matched. This occasionally resulted in some instructions being incorrectly deleted from the program. v2: - Fix bug with 64-bit mul llvm-svn: 205731 2014-04-08 03:45:41 +08:00			`// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when`
			`// performing the mulitply. The result is a 32-bit value.`
			`def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,`
			`[SDNPCommutative]`
			`>;`
			`def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,`
			`[SDNPCommutative]`
			`>;`
R600: Add intrinsics for mad24 llvm-svn: 209456 2014-05-23 02:00:15 +08:00
			`def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,`
			`[]`
			`>;`
			`def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,`
			`[]`
			`>;`
R600: Remove AMDIL instruction and register definitions Most of these are no longer used any more. llvm-svn: 210915 2014-06-14 00:38:59 +08:00
			`//===----------------------------------------------------------------------===//`
			`// Flow Control Profile Types`
			`//===----------------------------------------------------------------------===//`
			`// Branch instruction where second and third are basic blocks`
			`def SDTIL_BRCond : SDTypeProfile<0, 2, [`
			`SDTCisVT<0, OtherVT>`
			`]>;`

			`//===----------------------------------------------------------------------===//`
			`// Flow Control DAG Nodes`
			`//===----------------------------------------------------------------------===//`
			`def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;`

			`//===----------------------------------------------------------------------===//`
			`// Call/Return DAG Nodes`
			`//===----------------------------------------------------------------------===//`
			`def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,`
			`[SDNPHasChain, SDNPOptInGlue]>;`