forked from OSchip/llvm-project
NVPTX: Remove the legacy ptx intrinsics
- Rename the ptx.read.* intrinsics to nvvm.read.ptx.sreg.* - some but not all of these registers were already accessible via the nvvm name. - Rename ptx.bar.sync nvvm.bar.sync, to match nvvm.bar0. There's a fair amount of code motion here, but it's all very mechanical. llvm-svn: 274769
This commit is contained in:
parent
3972953efd
commit
a466cc33fa
|
@ -738,6 +738,10 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
|||
def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrConvergent]>;
|
||||
|
||||
def int_nvvm_bar_sync :
|
||||
Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
|
||||
GCCBuiltin<"__nvvm_bar_sync">;
|
||||
|
||||
// Membar
|
||||
def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
@ -746,53 +750,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
|
|||
def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
|
||||
Intrinsic<[], [], []>;
|
||||
|
||||
|
||||
// Accessing special registers
|
||||
def int_nvvm_read_ptx_sreg_tid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
|
||||
def int_nvvm_read_ptx_sreg_tid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
|
||||
def int_nvvm_read_ptx_sreg_tid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_ntid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
|
||||
def int_nvvm_read_ptx_sreg_ntid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
|
||||
def int_nvvm_read_ptx_sreg_ntid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_ctaid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
|
||||
def int_nvvm_read_ptx_sreg_ctaid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
|
||||
def int_nvvm_read_ptx_sreg_ctaid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_nctaid_x :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
|
||||
def int_nvvm_read_ptx_sreg_nctaid_y :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
|
||||
def int_nvvm_read_ptx_sreg_nctaid_z :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
|
||||
|
||||
def int_nvvm_read_ptx_sreg_warpsize :
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
|
||||
|
||||
|
||||
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
|
||||
// pointer's alignment.
|
||||
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
|
||||
|
@ -3666,9 +3623,8 @@ def int_nvvm_swap_lo_hi_b64
|
|||
GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
|
||||
|
||||
|
||||
// Old PTX back-end intrinsics retained here for backwards-compatibility
|
||||
|
||||
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
|
||||
// Accessing special registers.
|
||||
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
|
||||
// FIXME: Do we need the 128-bit integer type version?
|
||||
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
|
||||
|
||||
|
@ -3676,74 +3632,57 @@ multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
|
|||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
|
||||
|
||||
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_x")>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
|
||||
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_y")>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
|
||||
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_z")>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
|
||||
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_w")>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
|
||||
}
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r32<string name>
|
||||
class PTXReadSRegIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r64<string name>
|
||||
class PTXReadSRegIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
|
||||
|
||||
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_tid">;
|
||||
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ntid">;
|
||||
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
|
||||
defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
|
||||
|
||||
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_laneid">;
|
||||
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_warpid">;
|
||||
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nwarpid">;
|
||||
def int_nvvm_read_ptx_sreg_laneid : PTXReadSRegIntrinsic_r32<"laneid">;
|
||||
def int_nvvm_read_ptx_sreg_warpid : PTXReadSRegIntrinsic_r32<"warpid">;
|
||||
def int_nvvm_read_ptx_sreg_nwarpid : PTXReadSRegIntrinsic_r32<"nwarpid">;
|
||||
|
||||
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ctaid">;
|
||||
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_nctaid">;
|
||||
defm int_nvvm_read_ptx_sreg_ctaid : PTXReadSRegIntrinsic_v4i32<"ctaid">;
|
||||
defm int_nvvm_read_ptx_sreg_nctaid : PTXReadSRegIntrinsic_v4i32<"nctaid">;
|
||||
|
||||
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_smid">;
|
||||
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nsmid">;
|
||||
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_gridid">;
|
||||
def int_nvvm_read_ptx_sreg_smid : PTXReadSRegIntrinsic_r32<"smid">;
|
||||
def int_nvvm_read_ptx_sreg_nsmid : PTXReadSRegIntrinsic_r32<"nsmid">;
|
||||
def int_nvvm_read_ptx_sreg_gridid : PTXReadSRegIntrinsic_r32<"gridid">;
|
||||
|
||||
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_eq">;
|
||||
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_le">;
|
||||
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_lt">;
|
||||
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_ge">;
|
||||
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_gt">;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_eq :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_eq">;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_le :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_le">;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_lt :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_lt">;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_ge :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_ge">;
|
||||
def int_nvvm_read_ptx_sreg_lanemask_gt :
|
||||
PTXReadSRegIntrinsic_r32<"lanemask_gt">;
|
||||
|
||||
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_clock">;
|
||||
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
|
||||
<"__builtin_ptx_read_clock64">;
|
||||
def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
|
||||
def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
|
||||
|
||||
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm0">;
|
||||
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm1">;
|
||||
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm2">;
|
||||
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm3">;
|
||||
def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
|
||||
def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
|
||||
def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
|
||||
def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
|
||||
|
||||
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], [IntrConvergent]>,
|
||||
GCCBuiltin<"__builtin_ptx_bar_sync">;
|
||||
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
|
||||
|
||||
//
|
||||
// SHUFFLE
|
||||
|
|
|
@ -62,6 +62,9 @@ def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
|
|||
!strconcat("}}", ""))))))),
|
||||
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
|
||||
|
||||
def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
|
||||
[(int_nvvm_bar_sync imm:$i)]>;
|
||||
|
||||
// shfl.{up,down,bfly,idx}.b32
|
||||
multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
|
||||
// The last two parameters to shfl can be regs or imms. ptxas is smart
|
||||
|
@ -1375,44 +1378,6 @@ defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
|
|||
".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
|
||||
|
||||
|
||||
//-----------------------------------
|
||||
// Read Special Registers
|
||||
//-----------------------------------
|
||||
class F_SREG<string OpStr, NVPTXRegClass regclassOut, Intrinsic IntOp> :
|
||||
NVPTXInst<(outs regclassOut:$dst), (ins),
|
||||
OpStr,
|
||||
[(set regclassOut:$dst, (IntOp))]>;
|
||||
|
||||
def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_tid_x>;
|
||||
def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_tid_y>;
|
||||
def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_tid_z>;
|
||||
|
||||
def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ntid_x>;
|
||||
def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ntid_y>;
|
||||
def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ntid_z>;
|
||||
|
||||
def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ctaid_x>;
|
||||
def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ctaid_y>;
|
||||
def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_ctaid_z>;
|
||||
|
||||
def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_nctaid_x>;
|
||||
def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_nctaid_y>;
|
||||
def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_nctaid_z>;
|
||||
|
||||
def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
|
||||
int_nvvm_read_ptx_sreg_warpsize>;
|
||||
|
||||
|
||||
//-----------------------------------
|
||||
|
@ -7005,98 +6970,95 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
|
|||
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
||||
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
||||
|
||||
//-----------------------------------
|
||||
// Read Special Registers
|
||||
//-----------------------------------
|
||||
|
||||
|
||||
//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
|
||||
|
||||
// These intrinsics are handled to retain compatibility with the old backend.
|
||||
|
||||
// PTX Special Purpose Register Accessor Intrinsics
|
||||
|
||||
class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
|
||||
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
|
||||
: NVPTXInst<(outs Int64Regs:$d), (ins),
|
||||
!strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
|
||||
[(set Int64Regs:$d, (intop))]>;
|
||||
|
||||
class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
|
||||
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
|
||||
: NVPTXInst<(outs Int32Regs:$d), (ins),
|
||||
!strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
|
||||
[(set Int32Regs:$d, (intop))]>;
|
||||
|
||||
// TODO Add read vector-version of special registers
|
||||
|
||||
def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
|
||||
int_ptx_read_tid_x>;
|
||||
def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
|
||||
int_ptx_read_tid_y>;
|
||||
def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
|
||||
int_ptx_read_tid_z>;
|
||||
def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
|
||||
int_ptx_read_tid_w>;
|
||||
def INT_PTX_SREG_TID_X :
|
||||
PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
|
||||
def INT_PTX_SREG_TID_Y :
|
||||
PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
|
||||
def INT_PTX_SREG_TID_Z :
|
||||
PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
|
||||
def INT_PTX_SREG_TID_W :
|
||||
PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
|
||||
|
||||
def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
|
||||
int_ptx_read_ntid_x>;
|
||||
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
|
||||
int_ptx_read_ntid_y>;
|
||||
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
|
||||
int_ptx_read_ntid_z>;
|
||||
def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
|
||||
int_ptx_read_ntid_w>;
|
||||
def INT_PTX_SREG_NTID_X :
|
||||
PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
|
||||
def INT_PTX_SREG_NTID_Y :
|
||||
PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
|
||||
def INT_PTX_SREG_NTID_Z :
|
||||
PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
|
||||
def INT_PTX_SREG_NTID_W :
|
||||
PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
|
||||
|
||||
def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
|
||||
int_ptx_read_laneid>;
|
||||
def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
|
||||
int_ptx_read_warpid>;
|
||||
def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
|
||||
int_ptx_read_nwarpid>;
|
||||
def INT_PTX_SREG_LANEID :
|
||||
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
|
||||
def INT_PTX_SREG_WARPID :
|
||||
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
|
||||
def INT_PTX_SREG_NWARPID :
|
||||
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
|
||||
|
||||
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
|
||||
int_ptx_read_ctaid_x>;
|
||||
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
|
||||
int_ptx_read_ctaid_y>;
|
||||
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
|
||||
int_ptx_read_ctaid_z>;
|
||||
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
|
||||
int_ptx_read_ctaid_w>;
|
||||
def INT_PTX_SREG_CTAID_X :
|
||||
PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
|
||||
def INT_PTX_SREG_CTAID_Y :
|
||||
PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
|
||||
def INT_PTX_SREG_CTAID_Z :
|
||||
PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
|
||||
def INT_PTX_SREG_CTAID_W :
|
||||
PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
|
||||
|
||||
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
|
||||
int_ptx_read_nctaid_x>;
|
||||
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
|
||||
int_ptx_read_nctaid_y>;
|
||||
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
|
||||
int_ptx_read_nctaid_z>;
|
||||
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
|
||||
int_ptx_read_nctaid_w>;
|
||||
def INT_PTX_SREG_NCTAID_X :
|
||||
PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
|
||||
def INT_PTX_SREG_NCTAID_Y :
|
||||
PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
|
||||
def INT_PTX_SREG_NCTAID_Z :
|
||||
PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
|
||||
def INT_PTX_SREG_NCTAID_W :
|
||||
PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
|
||||
|
||||
def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
|
||||
int_ptx_read_smid>;
|
||||
def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
|
||||
int_ptx_read_nsmid>;
|
||||
def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
|
||||
int_ptx_read_gridid>;
|
||||
def INT_PTX_SREG_SMID :
|
||||
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
|
||||
def INT_PTX_SREG_NSMID :
|
||||
PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
|
||||
def INT_PTX_SREG_GRIDID :
|
||||
PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
|
||||
|
||||
def PTX_READ_LANEMASK_EQ
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
|
||||
def PTX_READ_LANEMASK_LE
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
|
||||
def PTX_READ_LANEMASK_LT
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
|
||||
def PTX_READ_LANEMASK_GE
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
|
||||
def PTX_READ_LANEMASK_GT
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
|
||||
def INT_PTX_SREG_LANEMASK_EQ :
|
||||
PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
|
||||
def INT_PTX_SREG_LANEMASK_LE :
|
||||
PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
|
||||
def INT_PTX_SREG_LANEMASK_LT :
|
||||
PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
|
||||
def INT_PTX_SREG_LANEMASK_GE :
|
||||
PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
|
||||
def INT_PTX_SREG_LANEMASK_GT :
|
||||
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
|
||||
|
||||
def PTX_READ_CLOCK
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
|
||||
def PTX_READ_CLOCK64
|
||||
: PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
|
||||
def INT_PTX_SREG_CLOCK :
|
||||
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
|
||||
def INT_PTX_SREG_CLOCK64 :
|
||||
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
|
||||
|
||||
def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
|
||||
def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
|
||||
def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
|
||||
def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
|
||||
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
|
||||
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
|
||||
def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
|
||||
def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
|
||||
|
||||
// PTX Parallel Synchronization and Communication Intrinsics
|
||||
|
||||
def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
|
||||
[(int_ptx_bar_sync imm:$i)]>;
|
||||
// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
|
||||
// handle the constant.
|
||||
def INT_PTX_SREG_WARPSIZE :
|
||||
NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
|
||||
[(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
|
||||
|
|
|
@ -32,7 +32,7 @@ static bool readsThreadIndex(const IntrinsicInst *II) {
|
|||
}
|
||||
|
||||
static bool readsLaneId(const IntrinsicInst *II) {
|
||||
return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
|
||||
return II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_laneid;
|
||||
}
|
||||
|
||||
// Whether the given intrinsic is an atomic instruction in PTX.
|
||||
|
|
|
@ -85,57 +85,45 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
|
|||
if (Function *Callee = Call->getCalledFunction()) {
|
||||
switch (Callee->getIntrinsicID()) {
|
||||
// Index within block
|
||||
case Intrinsic::ptx_read_tid_x:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_tid_x:
|
||||
Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_tid_y:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_tid_y:
|
||||
Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_tid_z:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_tid_z:
|
||||
Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
|
||||
break;
|
||||
|
||||
// Block size
|
||||
case Intrinsic::ptx_read_ntid_x:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
|
||||
Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_ntid_y:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
|
||||
Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_ntid_z:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
|
||||
Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
|
||||
break;
|
||||
|
||||
// Index within grid
|
||||
case Intrinsic::ptx_read_ctaid_x:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
|
||||
Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_ctaid_y:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
|
||||
Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_ctaid_z:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
|
||||
Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
|
||||
break;
|
||||
|
||||
// Grid size
|
||||
case Intrinsic::ptx_read_nctaid_x:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
|
||||
Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_nctaid_y:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
|
||||
Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
|
||||
break;
|
||||
case Intrinsic::ptx_read_nctaid_z:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
|
||||
Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
|
||||
break;
|
||||
|
@ -146,7 +134,7 @@ bool NVVMIntrRange::runOnFunction(Function &F) {
|
|||
break;
|
||||
|
||||
// Lane ID is [0..warpsize)
|
||||
case Intrinsic::ptx_read_laneid:
|
||||
case Intrinsic::nvvm_read_ptx_sreg_laneid:
|
||||
Changed |= addRangeMetadata(0, 32, Call);
|
||||
break;
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ merge:
|
|||
define i32 @loop() {
|
||||
; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'loop'
|
||||
entry:
|
||||
%laneid = call i32 @llvm.ptx.read.laneid()
|
||||
%laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
br label %loop
|
||||
loop:
|
||||
%i = phi i32 [ 0, %entry ], [ %i1, %loop ]
|
||||
|
@ -208,7 +208,7 @@ bb3:
|
|||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
declare i32 @llvm.ptx.read.laneid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
|
||||
!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
|
||||
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
|
||||
|
|
|
@ -10,10 +10,10 @@ target triple = "nvptx64-nvidia-cuda"
|
|||
define void @some_kernel(%class.float3* nocapture %dst) #0 {
|
||||
_ZL11compute_vecRK6float3jb.exit:
|
||||
%ret_vec.sroa.8.i = alloca float, align 4
|
||||
%0 = tail call i32 @llvm.ptx.read.ctaid.x()
|
||||
%1 = tail call i32 @llvm.ptx.read.ntid.x()
|
||||
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
%2 = mul nsw i32 %1, %0
|
||||
%3 = tail call i32 @llvm.ptx.read.tid.x()
|
||||
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
%4 = add nsw i32 %2, %3
|
||||
%5 = zext i32 %4 to i64
|
||||
%6 = bitcast float* %ret_vec.sroa.8.i to i8*
|
||||
|
@ -37,13 +37,13 @@ _ZL11compute_vecRK6float3jb.exit:
|
|||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.ptx.read.ctaid.x() #1
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.ptx.read.ntid.x() #1
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #1
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare i32 @llvm.ptx.read.tid.x() #1
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) #2
|
||||
|
|
|
@ -8,71 +8,71 @@
|
|||
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
|
||||
; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.x()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_y() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
|
||||
; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.y(), !range ![[BLK_IDX_XY]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.y()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_z() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
|
||||
; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.z()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_w() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.w()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_x() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
|
||||
; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.x()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_y() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
|
||||
; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y(), !range ![[BLK_SIZE_XY]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.y()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_z() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
|
||||
; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.z()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_w() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.w()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_laneid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
|
||||
; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.laneid(), !range ![[LANEID:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.laneid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
|
@ -87,71 +87,71 @@ define ptx_device i32 @test_warpsize() {
|
|||
define ptx_device i32 @test_warpid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.warpid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.warpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nwarpid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nwarpid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_y() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
|
||||
; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.y()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_z() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
|
||||
; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z(), !range ![[GRID_IDX_YZ]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.z()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_x() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
|
||||
; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
|
||||
; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
|
||||
; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
|
||||
; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[GRID_IDX_YZ]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.x()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_w() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.w()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_y() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
|
||||
; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.y()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_z() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
|
||||
; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
|
||||
; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z(), !range ![[GRID_SIZE_YZ]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.z()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_x() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
|
||||
; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
|
||||
; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
|
||||
; RANGE_30: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
|
||||
; RANGE_20: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x(), !range ![[GRID_SIZE_YZ]]
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.x()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
|
@ -159,157 +159,157 @@ define ptx_device i32 @test_nctaid_x() {
|
|||
define ptx_device i32 @test_nctaid_w() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.w()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_smid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.smid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.smid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nsmid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nsmid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_gridid() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.gridid()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.gridid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_eq() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.eq()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_le() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.le()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_lt() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.lt()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_ge() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.ge()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_gt() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.gt()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_clock() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.clock()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.clock()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_clock64() {
|
||||
; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
|
||||
; CHECK: ret;
|
||||
%x = call i64 @llvm.ptx.read.clock64()
|
||||
%x = call i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm0() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm0()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm0()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm1() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm1()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm1()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm2() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm2()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm2()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm3() {
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm3()
|
||||
%x = call i32 @llvm.nvvm.read.ptx.sreg.pm3()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device void @test_bar_sync() {
|
||||
; CHECK: bar.sync 0
|
||||
; CHECK: ret;
|
||||
call void @llvm.ptx.bar.sync(i32 0)
|
||||
call void @llvm.nvvm.bar.sync(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.ptx.read.tid.x()
|
||||
declare i32 @llvm.ptx.read.tid.y()
|
||||
declare i32 @llvm.ptx.read.tid.z()
|
||||
declare i32 @llvm.ptx.read.tid.w()
|
||||
declare i32 @llvm.ptx.read.ntid.x()
|
||||
declare i32 @llvm.ptx.read.ntid.y()
|
||||
declare i32 @llvm.ptx.read.ntid.z()
|
||||
declare i32 @llvm.ptx.read.ntid.w()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.tid.w()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
|
||||
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
|
||||
declare i32 @llvm.ptx.read.laneid()
|
||||
declare i32 @llvm.ptx.read.warpid()
|
||||
declare i32 @llvm.ptx.read.nwarpid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.laneid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.warpid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
|
||||
|
||||
declare i32 @llvm.ptx.read.ctaid.x()
|
||||
declare i32 @llvm.ptx.read.ctaid.y()
|
||||
declare i32 @llvm.ptx.read.ctaid.z()
|
||||
declare i32 @llvm.ptx.read.ctaid.w()
|
||||
declare i32 @llvm.ptx.read.nctaid.x()
|
||||
declare i32 @llvm.ptx.read.nctaid.y()
|
||||
declare i32 @llvm.ptx.read.nctaid.z()
|
||||
declare i32 @llvm.ptx.read.nctaid.w()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.smid()
|
||||
declare i32 @llvm.ptx.read.nsmid()
|
||||
declare i32 @llvm.ptx.read.gridid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.smid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.nsmid()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.gridid()
|
||||
|
||||
declare i32 @llvm.ptx.read.lanemask.eq()
|
||||
declare i32 @llvm.ptx.read.lanemask.le()
|
||||
declare i32 @llvm.ptx.read.lanemask.lt()
|
||||
declare i32 @llvm.ptx.read.lanemask.ge()
|
||||
declare i32 @llvm.ptx.read.lanemask.gt()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
|
||||
|
||||
declare i32 @llvm.ptx.read.clock()
|
||||
declare i64 @llvm.ptx.read.clock64()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
|
||||
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
|
||||
|
||||
declare i32 @llvm.ptx.read.pm0()
|
||||
declare i32 @llvm.ptx.read.pm1()
|
||||
declare i32 @llvm.ptx.read.pm2()
|
||||
declare i32 @llvm.ptx.read.pm3()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.pm0()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.pm1()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.pm2()
|
||||
declare i32 @llvm.nvvm.read.ptx.sreg.pm3()
|
||||
|
||||
declare void @llvm.ptx.bar.sync(i32 %i)
|
||||
declare void @llvm.nvvm.bar.sync(i32 %i)
|
||||
|
||||
; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
|
||||
; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
|
||||
|
|
Loading…
Reference in New Issue