NVPTX: Remove the legacy ptx intrinsics

- Rename the ptx.read.* intrinsics to nvvm.read.ptx.sreg.* - some but
  not all of these registers were already accessible via the nvvm
  name.
- Rename ptx.bar.sync nvvm.bar.sync, to match nvvm.bar0.

There's a fair amount of code motion here, but it's all very
mechanical.

llvm-svn: 274769
This commit is contained in:
Justin Bogner 2016-07-07 16:40:17 +00:00
parent 3972953efd
commit a466cc33fa
7 changed files with 209 additions and 320 deletions

View File

@ -738,6 +738,10 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
def int_nvvm_bar_sync :
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
GCCBuiltin<"__nvvm_bar_sync">;
// Membar
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], []>;
@ -746,53 +750,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
Intrinsic<[], [], []>;
// Accessing special registers
def int_nvvm_read_ptx_sreg_tid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
def int_nvvm_read_ptx_sreg_tid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
def int_nvvm_read_ptx_sreg_tid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
def int_nvvm_read_ptx_sreg_ntid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
def int_nvvm_read_ptx_sreg_ntid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
def int_nvvm_read_ptx_sreg_ntid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
def int_nvvm_read_ptx_sreg_ctaid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
def int_nvvm_read_ptx_sreg_ctaid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
def int_nvvm_read_ptx_sreg_ctaid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
def int_nvvm_read_ptx_sreg_nctaid_x :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
def int_nvvm_read_ptx_sreg_nctaid_y :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
def int_nvvm_read_ptx_sreg_nctaid_z :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
def int_nvvm_read_ptx_sreg_warpsize :
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
@ -3666,9 +3623,8 @@ def int_nvvm_swap_lo_hi_b64
GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
// Old PTX back-end intrinsics retained here for backwards-compatibility
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
// Accessing special registers.
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
@ -3676,74 +3632,57 @@ multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_x")>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_y")>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_z")>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_w")>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
}
class PTXReadSpecialRegisterIntrinsic_r32<string name>
class PTXReadSRegIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSpecialRegisterIntrinsic_r64<string name>
class PTXReadSRegIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_tid">;
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ntid">;
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_laneid">;
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_warpid">;
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nwarpid">;
def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ctaid">;
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_nctaid">;
defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_smid">;
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nsmid">;
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_gridid">;
def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_eq">;
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_le">;
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_lt">;
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_ge">;
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_gt">;
def int_nvvm_read_ptx_sreg_lanemask_eq :
PTXReadSRegIntrinsic_r32<"lanemask_eq">;
def int_nvvm_read_ptx_sreg_lanemask_le :
PTXReadSRegIntrinsic_r32<"lanemask_le">;
def int_nvvm_read_ptx_sreg_lanemask_lt :
PTXReadSRegIntrinsic_r32<"lanemask_lt">;
def int_nvvm_read_ptx_sreg_lanemask_ge :
PTXReadSRegIntrinsic_r32<"lanemask_ge">;
def int_nvvm_read_ptx_sreg_lanemask_gt :
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_clock">;
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
<"__builtin_ptx_read_clock64">;
def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm0">;
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm1">;
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm2">;
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm3">;
def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
GCCBuiltin<"__builtin_ptx_bar_sync">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
//
// SHUFFLE

View File

@ -62,6 +62,9 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("}}", ""))))))),
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
[(int_nvvm_bar_sync imm:$i)]>;
// shfl.{up,down,bfly,idx}.b32
multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
// The last two parameters to shfl can be regs or imms. ptxas is smart
@ -1375,44 +1378,6 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
//-----------------------------------
// Read Special Registers
//-----------------------------------
class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
NVPTXInst<(outs regclassOut:$dst), (ins),
OpStr,
[(set regclassOut:$dst, (IntOp))]>;
def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
int_nvvm_read_ptx_sreg_tid_x>;
def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
int_nvvm_read_ptx_sreg_tid_y>;
def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
int_nvvm_read_ptx_sreg_tid_z>;
def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
int_nvvm_read_ptx_sreg_ntid_x>;
def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
int_nvvm_read_ptx_sreg_ntid_y>;
def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
int_nvvm_read_ptx_sreg_ntid_z>;
def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
int_nvvm_read_ptx_sreg_ctaid_x>;
def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
int_nvvm_read_ptx_sreg_ctaid_y>;
def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
int_nvvm_read_ptx_sreg_ctaid_z>;
def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
int_nvvm_read_ptx_sreg_nctaid_x>;
def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
int_nvvm_read_ptx_sreg_nctaid_y>;
def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
int_nvvm_read_ptx_sreg_nctaid_z>;
def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
int_nvvm_read_ptx_sreg_warpsize>;
//-----------------------------------
@ -7005,98 +6970,95 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
//-----------------------------------
// Read Special Registers
//-----------------------------------
//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
// These intrinsics are handled to retain compatibility with the old backend.
// PTX Special Purpose Register Accessor Intrinsics
class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
: NVPTXInst<(outs Int64Regs:$d), (ins),
!strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
[(set Int64Regs:$d, (intop))]>;
class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
: NVPTXInst<(outs Int32Regs:$d), (ins),
!strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
[(set Int32Regs:$d, (intop))]>;
// TODO Add read vector-version of special registers
def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
int_ptx_read_tid_x>;
def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
int_ptx_read_tid_y>;
def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
int_ptx_read_tid_z>;
def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
int_ptx_read_tid_w>;
def INT_PTX_SREG_TID_X :
PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
def INT_PTX_SREG_TID_Y :
PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
def INT_PTX_SREG_TID_Z :
PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
def INT_PTX_SREG_TID_W :
PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
int_ptx_read_ntid_x>;
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
int_ptx_read_ntid_y>;
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
int_ptx_read_ntid_z>;
def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
int_ptx_read_ntid_w>;
def INT_PTX_SREG_NTID_X :
PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
def INT_PTX_SREG_NTID_Y :
PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
def INT_PTX_SREG_NTID_Z :
PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
def INT_PTX_SREG_NTID_W :
PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
int_ptx_read_laneid>;
def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
int_ptx_read_warpid>;
def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
int_ptx_read_nwarpid>;
def INT_PTX_SREG_LANEID :
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
def INT_PTX_SREG_WARPID :
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
def INT_PTX_SREG_NWARPID :
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
int_ptx_read_ctaid_x>;
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
int_ptx_read_ctaid_y>;
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
int_ptx_read_ctaid_z>;
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
int_ptx_read_ctaid_w>;
def INT_PTX_SREG_CTAID_X :
PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
def INT_PTX_SREG_CTAID_Y :
PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
def INT_PTX_SREG_CTAID_Z :
PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
def INT_PTX_SREG_CTAID_W :
PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
int_ptx_read_nctaid_x>;
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
int_ptx_read_nctaid_y>;
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
int_ptx_read_nctaid_z>;
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
int_ptx_read_nctaid_w>;
def INT_PTX_SREG_NCTAID_X :
PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
def INT_PTX_SREG_NCTAID_Y :
PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
def INT_PTX_SREG_NCTAID_Z :
PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
def INT_PTX_SREG_NCTAID_W :
PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
int_ptx_read_smid>;
def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
int_ptx_read_nsmid>;
def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
int_ptx_read_gridid>;
def INT_PTX_SREG_SMID :
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
def INT_PTX_SREG_NSMID :
PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
def INT_PTX_SREG_GRIDID :
PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
def PTX_READ_LANEMASK_EQ
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
def PTX_READ_LANEMASK_LE
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
def PTX_READ_LANEMASK_LT
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
def PTX_READ_LANEMASK_GE
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
def PTX_READ_LANEMASK_GT
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
def INT_PTX_SREG_LANEMASK_EQ :
PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
def INT_PTX_SREG_LANEMASK_LE :
PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
def INT_PTX_SREG_LANEMASK_LT :
PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
def INT_PTX_SREG_LANEMASK_GE :
PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
def INT_PTX_SREG_LANEMASK_GT :
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
def PTX_READ_CLOCK
: PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
def PTX_READ_CLOCK64
: PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
def INT_PTX_SREG_CLOCK :
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def INT_PTX_SREG_CLOCK64 :
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
// PTX Parallel Synchronization and Communication Intrinsics
def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
[(int_ptx_bar_sync imm:$i)]>;
// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
// handle the constant.
def INT_PTX_SREG_WARPSIZE :
NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
[(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;

View File

@ -32,7 +32,7 @@ static bool readsThreadIndex(const IntrinsicInst *II) {
}
static bool readsLaneId(const IntrinsicInst *II) {
return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
}
// Whether the given intrinsic is an atomic instruction in PTX.

View File

@ -85,57 +85,45 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
if (Function *Callee = Call->getCalledFunction()) {
switch (Callee->getIntrinsicID()) {
// Index within block
case Intrinsic::ptx_read_tid_x:
case Intrinsic::nvvm_read_ptx_sreg_tid_x:
Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
break;
case Intrinsic::ptx_read_tid_y:
case Intrinsic::nvvm_read_ptx_sreg_tid_y:
Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
break;
case Intrinsic::ptx_read_tid_z:
case Intrinsic::nvvm_read_ptx_sreg_tid_z:
Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
break;
// Block size
case Intrinsic::ptx_read_ntid_x:
case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
break;
case Intrinsic::ptx_read_ntid_y:
case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
break;
case Intrinsic::ptx_read_ntid_z:
case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
break;
// Index within grid
case Intrinsic::ptx_read_ctaid_x:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
break;
case Intrinsic::ptx_read_ctaid_y:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
break;
case Intrinsic::ptx_read_ctaid_z:
case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
break;
// Grid size
case Intrinsic::ptx_read_nctaid_x:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
break;
case Intrinsic::ptx_read_nctaid_y:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
break;
case Intrinsic::ptx_read_nctaid_z:
case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
break;
@ -146,7 +134,7 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
break;
// Lane ID is [0..warpsize)
case Intrinsic::ptx_read_laneid:
case Intrinsic::nvvm_read_ptx_sreg_laneid:
Changed |= addRangeMetadata(0, 32, Call);
break;

View File

@ -100,7 +100,7 @@ merge:
define i32 @loop() {
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
entry:
%laneid = call i32 @llvm.ptx.read.laneid()
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
@ -208,7 +208,7 @@ bb3:
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.ptx.read.laneid()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}

View File

@ -10,10 +10,10 @@ target triple = "nvptx64-nvidia-cuda"
define void @some_kernel(%class.float3* nocapture %dst) #0 {
_ZL11compute_vecRK6float3jb.exit:
%ret_vec.sroa.8.i = alloca float, align 4
%0 = tail call i32 @llvm.ptx.read.ctaid.x()
%1 = tail call i32 @llvm.ptx.read.ntid.x()
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
%2 = mul nsw i32 %1, %0
%3 = tail call i32 @llvm.ptx.read.tid.x()
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%4 = add nsw i32 %2, %3
%5 = zext i32 %4 to i64
%6 = bitcast float* %ret_vec.sroa.8.i to i8*
@ -37,13 +37,13 @@ _ZL11compute_vecRK6float3jb.exit:
}
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.ctaid.x() #1
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.ntid.x() #1
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.ptx.read.tid.x() #1
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #2

View File

@ -8,71 +8,71 @@
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.x()
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
ret i32 %x
}
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.y()
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
ret i32 %x
}
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.z()
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
ret i32 %x
}
define ptx_device i32 @test_tid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.w()
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
ret i32 %x
}
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.x()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
ret i32 %x
}
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.y()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
ret i32 %x
}
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.z()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
ret i32 %x
}
define ptx_device i32 @test_ntid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.w()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
ret i32 %x
}
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.laneid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
ret i32 %x
}
@ -87,71 +87,71 @@ define ptx_device i32 @test_warpsize() {
define ptx_device i32 @test_warpid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.warpid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
ret i32 %x
}
define ptx_device i32 @test_nwarpid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nwarpid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
ret i32 %x
}
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.y()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
ret i32 %x
}
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.z()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
ret i32 %x
}
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.x()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
ret i32 %x
}
define ptx_device i32 @test_ctaid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.w()
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.y()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
ret i32 %x
}
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.z()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
ret i32 %x
}
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.x()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
ret i32 %x
}
@ -159,157 +159,157 @@ define ptx_device i32 @test_nctaid_x() {
define ptx_device i32 @test_nctaid_w() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.w()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
ret i32 %x
}
define ptx_device i32 @test_smid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.smid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
ret i32 %x
}
define ptx_device i32 @test_nsmid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nsmid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
ret i32 %x
}
define ptx_device i32 @test_gridid() {
; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.gridid()
%x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
ret i32 %x
}
define ptx_device i32 @test_lanemask_eq() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.eq()
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
ret i32 %x
}
define ptx_device i32 @test_lanemask_le() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.le()
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
ret i32 %x
}
define ptx_device i32 @test_lanemask_lt() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.lt()
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
ret i32 %x
}
define ptx_device i32 @test_lanemask_ge() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.ge()
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
ret i32 %x
}
define ptx_device i32 @test_lanemask_gt() {
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.gt()
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
ret i32 %x
}
define ptx_device i32 @test_clock() {
; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.clock()
%x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
ret i32 %x
}
define ptx_device i64 @test_clock64() {
; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
; CHECK: ret;
%x = call i64 @llvm.ptx.read.clock64()
%x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
ret i64 %x
}
define ptx_device i32 @test_pm0() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm0()
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
ret i32 %x
}
define ptx_device i32 @test_pm1() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm1()
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
ret i32 %x
}
define ptx_device i32 @test_pm2() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm2()
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
ret i32 %x
}
define ptx_device i32 @test_pm3() {
; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm3()
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
ret i32 %x
}
define ptx_device void @test_bar_sync() {
; CHECK: bar.sync 0
; CHECK: ret;
call void @llvm.ptx.bar.sync(i32 0)
call void @llvm.nvvm.bar.sync(i32 0)
ret void
}
declare i32 @llvm.ptx.read.tid.x()
declare i32 @llvm.ptx.read.tid.y()
declare i32 @llvm.ptx.read.tid.z()
declare i32 @llvm.ptx.read.tid.w()
declare i32 @llvm.ptx.read.ntid.x()
declare i32 @llvm.ptx.read.ntid.y()
declare i32 @llvm.ptx.read.ntid.z()
declare i32 @llvm.ptx.read.ntid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
declare i32 @llvm.ptx.read.laneid()
declare i32 @llvm.ptx.read.warpid()
declare i32 @llvm.ptx.read.nwarpid()
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
declare i32 @llvm.ptx.read.ctaid.x()
declare i32 @llvm.ptx.read.ctaid.y()
declare i32 @llvm.ptx.read.ctaid.z()
declare i32 @llvm.ptx.read.ctaid.w()
declare i32 @llvm.ptx.read.nctaid.x()
declare i32 @llvm.ptx.read.nctaid.y()
declare i32 @llvm.ptx.read.nctaid.z()
declare i32 @llvm.ptx.read.nctaid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
declare i32 @llvm.ptx.read.smid()
declare i32 @llvm.ptx.read.nsmid()
declare i32 @llvm.ptx.read.gridid()
declare i32 @llvm.nvvm.read.ptx.sreg.smid()
declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
declare i32 @llvm.ptx.read.lanemask.eq()
declare i32 @llvm.ptx.read.lanemask.le()
declare i32 @llvm.ptx.read.lanemask.lt()
declare i32 @llvm.ptx.read.lanemask.ge()
declare i32 @llvm.ptx.read.lanemask.gt()
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
declare i32 @llvm.ptx.read.clock()
declare i64 @llvm.ptx.read.clock64()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
declare i32 @llvm.ptx.read.pm0()
declare i32 @llvm.ptx.read.pm1()
declare i32 @llvm.ptx.read.pm2()
declare i32 @llvm.ptx.read.pm3()
declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
declare void @llvm.ptx.bar.sync(i32 %i)
declare void @llvm.nvvm.bar.sync(i32 %i)
; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}