forked from OSchip/llvm-project
PTX: Set PTX 2.0 as the minimum supported version
- Remove PTX 1.4 code generation - Change type of intrinsics to .v4.i32 instead of .v4.i16 - Add and/or/xor integer instructions llvm-svn: 127677
This commit is contained in:
parent
c2631d26c0
commit
94751fbf32
|
@ -12,14 +12,17 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "ptx" in {
|
||||
// FIXME Since PTX 2.0, special registers are redefined as v4i32 type
|
||||
multiclass PTXReadSpecialRegisterIntrinsic_v4i16 {
|
||||
def _r64 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
|
||||
def _v4i16 : Intrinsic<[llvm_v4i16_ty], [], [IntrNoMem]>;
|
||||
def _x : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
|
||||
def _y : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
|
||||
def _z : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
|
||||
def _w : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
|
||||
multiclass PTXReadSpecialRegisterIntrinsic_v4i32 {
|
||||
// FIXME: Do we need the 128-bit integer type version?
|
||||
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
|
||||
|
||||
// FIXME: Enable this once v4i32 support is enabled in back-end.
|
||||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
|
||||
|
||||
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r32
|
||||
|
@ -29,15 +32,15 @@ let TargetPrefix = "ptx" in {
|
|||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i16;
|
||||
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i16;
|
||||
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32;
|
||||
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32;
|
||||
|
||||
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32;
|
||||
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32;
|
||||
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32;
|
||||
|
||||
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i16;
|
||||
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i16;
|
||||
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32;
|
||||
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32;
|
||||
|
||||
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32;
|
||||
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32;
|
||||
|
|
|
@ -29,17 +29,18 @@ def Feature64Bit : SubtargetFeature<"64bit", "Use64BitAddresses", "true",
|
|||
|
||||
//===- PTX Version --------------------------------------------------------===//
|
||||
|
||||
def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
|
||||
"Use PTX Language Version 1.4">;
|
||||
|
||||
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||
"Use PTX Language Version 2.0",
|
||||
[FeaturePTX14]>;
|
||||
[]>;
|
||||
|
||||
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
|
||||
"Use PTX Language Version 2.1",
|
||||
[FeaturePTX20]>;
|
||||
|
||||
def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
|
||||
"Use PTX Language Version 2.2",
|
||||
[FeaturePTX21]>;
|
||||
|
||||
//===- PTX Shader Model ---------------------------------------------------===//
|
||||
|
||||
def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
|
||||
|
|
|
@ -32,10 +32,11 @@ def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
|
|||
def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
|
||||
|
||||
// PTX Version Support
|
||||
def SupportsPTX20 : Predicate<"getSubtarget().supportsPTX20()">;
|
||||
def DoesNotSupportPTX20 : Predicate<"!getSubtarget().supportsPTX20()">;
|
||||
def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
|
||||
def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
|
||||
def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
|
||||
def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Pattern Stuff
|
||||
|
@ -253,6 +254,33 @@ multiclass INT3<string opcstr, SDNode opnode> {
|
|||
[(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
|
||||
def rr16 : InstPTX<(outs RRegu16:$d),
|
||||
(ins RRegu16:$a, RRegu16:$b),
|
||||
!strconcat(opcstr, ".b16\t$d, $a, $b"),
|
||||
[(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
|
||||
def ri16 : InstPTX<(outs RRegu16:$d),
|
||||
(ins RRegu16:$a, i16imm:$b),
|
||||
!strconcat(opcstr, ".b16\t$d, $a, $b"),
|
||||
[(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
|
||||
def rr32 : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, RRegu32:$b),
|
||||
!strconcat(opcstr, ".b32\t$d, $a, $b"),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
|
||||
def ri32 : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, i32imm:$b),
|
||||
!strconcat(opcstr, ".b32\t$d, $a, $b"),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
|
||||
def rr64 : InstPTX<(outs RRegu64:$d),
|
||||
(ins RRegu64:$a, RRegu64:$b),
|
||||
!strconcat(opcstr, ".b64\t$d, $a, $b"),
|
||||
[(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
|
||||
def ri64 : InstPTX<(outs RRegu64:$d),
|
||||
(ins RRegu64:$a, i64imm:$b),
|
||||
!strconcat(opcstr, ".b64\t$d, $a, $b"),
|
||||
[(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
|
||||
}
|
||||
|
||||
// no %type directive, non-communtable
|
||||
multiclass INT3ntnc<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegu32:$d),
|
||||
|
@ -359,6 +387,7 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
|||
|
||||
defm ADD : INT3<"add", add>;
|
||||
defm SUB : INT3<"sub", sub>;
|
||||
defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
|
||||
|
||||
///===- Floating-Point Arithmetic Instructions ----------------------------===//
|
||||
|
||||
|
@ -462,6 +491,10 @@ defm SHL : INT3ntnc<"shl.b32", PTXshl>;
|
|||
defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
|
||||
defm SRA : INT3ntnc<"shr.s32", PTXsra>;
|
||||
|
||||
defm AND : PTX_LOGIC<"and", and>;
|
||||
defm OR : PTX_LOGIC<"or", or>;
|
||||
defm XOR : PTX_LOGIC<"xor", xor>;
|
||||
|
||||
///===- Data Movement and Conversion Instructions -------------------------===//
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
|
|
|
@ -23,40 +23,35 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
|
|||
!strconcat("mov.u32\t$d, %", regname),
|
||||
[(set RRegu32:$d, (intop))]>;
|
||||
|
||||
class PTX_READ_SPECIAL_SUB_REGISTER<string regname, Intrinsic intop>
|
||||
: InstPTX<(outs RRegu16:$d), (ins),
|
||||
!strconcat("mov.u16\t$d, %", regname),
|
||||
[(set RRegu16:$d, (intop))]>;
|
||||
|
||||
// TODO Add read vector-version of special registers
|
||||
|
||||
def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
|
||||
def PTX_READ_TID_X : PTX_READ_SPECIAL_SUB_REGISTER<"tid.x", int_ptx_read_tid_x>;
|
||||
def PTX_READ_TID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"tid.y", int_ptx_read_tid_y>;
|
||||
def PTX_READ_TID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"tid.z", int_ptx_read_tid_z>;
|
||||
def PTX_READ_TID_W : PTX_READ_SPECIAL_SUB_REGISTER<"tid.w", int_ptx_read_tid_w>;
|
||||
//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
|
||||
def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
|
||||
def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
|
||||
def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
|
||||
def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
|
||||
|
||||
def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
|
||||
def PTX_READ_NTID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.x", int_ptx_read_ntid_x>;
|
||||
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.y", int_ptx_read_ntid_y>;
|
||||
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.z", int_ptx_read_ntid_z>;
|
||||
def PTX_READ_NTID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ntid.w", int_ptx_read_ntid_w>;
|
||||
//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
|
||||
def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
|
||||
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
|
||||
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
|
||||
def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
|
||||
|
||||
def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
|
||||
def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
|
||||
def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
|
||||
|
||||
def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
|
||||
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.x", int_ptx_read_ctaid_x>;
|
||||
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.y", int_ptx_read_ctaid_y>;
|
||||
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.z", int_ptx_read_ctaid_z>;
|
||||
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"ctaid.w", int_ptx_read_ctaid_w>;
|
||||
//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
|
||||
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
|
||||
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
|
||||
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
|
||||
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
|
||||
|
||||
def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
|
||||
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.x", int_ptx_read_nctaid_x>;
|
||||
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.y", int_ptx_read_nctaid_y>;
|
||||
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.z", int_ptx_read_nctaid_z>;
|
||||
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_SUB_REGISTER<"nctaid.w", int_ptx_read_nctaid_w>;
|
||||
//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
|
||||
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
|
||||
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
|
||||
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
|
||||
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
|
||||
|
||||
def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
|
||||
def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
|
||||
|
|
|
@ -18,7 +18,7 @@ using namespace llvm;
|
|||
|
||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
|
||||
: PTXShaderModel(PTX_SM_1_0),
|
||||
PTXVersion(PTX_VERSION_1_4),
|
||||
PTXVersion(PTX_VERSION_2_0),
|
||||
SupportsDouble(false),
|
||||
Use64BitAddresses(false) {
|
||||
std::string TARGET = "generic";
|
||||
|
@ -37,9 +37,9 @@ std::string PTXSubtarget::getTargetString() const {
|
|||
std::string PTXSubtarget::getPTXVersionString() const {
|
||||
switch(PTXVersion) {
|
||||
default: llvm_unreachable("Unknown PTX version");
|
||||
case PTX_VERSION_1_4: return "1.4";
|
||||
case PTX_VERSION_2_0: return "2.0";
|
||||
case PTX_VERSION_2_1: return "2.1";
|
||||
case PTX_VERSION_2_2: return "2.2";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,16 +19,25 @@
|
|||
namespace llvm {
|
||||
class PTXSubtarget : public TargetSubtarget {
|
||||
private:
|
||||
|
||||
/**
|
||||
* Enumeration of Shader Models supported by the back-end.
|
||||
*/
|
||||
enum PTXShaderModelEnum {
|
||||
PTX_SM_1_0,
|
||||
PTX_SM_1_3,
|
||||
PTX_SM_2_0
|
||||
PTX_SM_1_0, /*< Shader Model 1.0 */
|
||||
PTX_SM_1_3, /*< Shader Model 1.3 */
|
||||
PTX_SM_2_0 /*< Shader Model 2.0 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Enumeration of PTX versions supported by the back-end.
|
||||
*
|
||||
* Currently, PTX 2.0 is the minimum supported version.
|
||||
*/
|
||||
enum PTXVersionEnum {
|
||||
PTX_VERSION_1_4,
|
||||
PTX_VERSION_2_0,
|
||||
PTX_VERSION_2_1
|
||||
PTX_VERSION_2_0, /*< PTX Version 2.0 */
|
||||
PTX_VERSION_2_1, /*< PTX Version 2.1 */
|
||||
PTX_VERSION_2_2 /*< PTX Version 2.2 */
|
||||
};
|
||||
|
||||
/// Shader Model supported on the target GPU.
|
||||
|
@ -58,10 +67,10 @@ namespace llvm {
|
|||
|
||||
bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
|
||||
|
||||
bool supportsPTX20() const { return PTXVersion >= PTX_VERSION_2_0; }
|
||||
|
||||
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
|
||||
|
||||
bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
|
||||
|
||||
std::string ParseSubtargetFeatures(const std::string &FS,
|
||||
const std::string &CPU);
|
||||
}; // class PTXSubtarget
|
||||
|
|
|
@ -1,59 +1,59 @@
|
|||
; RUN: llc < %s -march=ptx -mattr=+ptx20,+sm20 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @test_tid_x() {
|
||||
; CHECK: mov.u16 rh0, %tid.x;
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 r0, %tid.x;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.tid.x()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.tid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_tid_y() {
|
||||
; CHECK: mov.u16 rh0, %tid.y;
|
||||
define ptx_device i32 @test_tid_y() {
|
||||
; CHECK: mov.u32 r0, %tid.y;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.tid.y()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.tid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_tid_z() {
|
||||
; CHECK: mov.u16 rh0, %tid.z;
|
||||
define ptx_device i32 @test_tid_z() {
|
||||
; CHECK: mov.u32 r0, %tid.z;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.tid.z()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.tid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_tid_w() {
|
||||
; CHECK: mov.u16 rh0, %tid.w;
|
||||
define ptx_device i32 @test_tid_w() {
|
||||
; CHECK: mov.u32 r0, %tid.w;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.tid.w()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.tid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ntid_x() {
|
||||
; CHECK: mov.u16 rh0, %ntid.x;
|
||||
define ptx_device i32 @test_ntid_x() {
|
||||
; CHECK: mov.u32 r0, %ntid.x;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ntid.x()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ntid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ntid_y() {
|
||||
; CHECK: mov.u16 rh0, %ntid.y;
|
||||
define ptx_device i32 @test_ntid_y() {
|
||||
; CHECK: mov.u32 r0, %ntid.y;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ntid.y()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ntid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ntid_z() {
|
||||
; CHECK: mov.u16 rh0, %ntid.z;
|
||||
define ptx_device i32 @test_ntid_z() {
|
||||
; CHECK: mov.u32 r0, %ntid.z;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ntid.z()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ntid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ntid_w() {
|
||||
; CHECK: mov.u16 rh0, %ntid.w;
|
||||
define ptx_device i32 @test_ntid_w() {
|
||||
; CHECK: mov.u32 r0, %ntid.w;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ntid.w()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ntid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_laneid() {
|
||||
|
@ -77,60 +77,60 @@ define ptx_device i32 @test_nwarpid() {
|
|||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ctaid_x() {
|
||||
; CHECK: mov.u16 rh0, %ctaid.x;
|
||||
define ptx_device i32 @test_ctaid_x() {
|
||||
; CHECK: mov.u32 r0, %ctaid.x;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ctaid.x()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ctaid_y() {
|
||||
; CHECK: mov.u16 rh0, %ctaid.y;
|
||||
define ptx_device i32 @test_ctaid_y() {
|
||||
; CHECK: mov.u32 r0, %ctaid.y;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ctaid.y()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ctaid_z() {
|
||||
; CHECK: mov.u16 rh0, %ctaid.z;
|
||||
define ptx_device i32 @test_ctaid_z() {
|
||||
; CHECK: mov.u32 r0, %ctaid.z;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ctaid.z()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_ctaid_w() {
|
||||
; CHECK: mov.u16 rh0, %ctaid.w;
|
||||
define ptx_device i32 @test_ctaid_w() {
|
||||
; CHECK: mov.u32 r0, %ctaid.w;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.ctaid.w()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.ctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_nctaid_x() {
|
||||
; CHECK: mov.u16 rh0, %nctaid.x;
|
||||
define ptx_device i32 @test_nctaid_x() {
|
||||
; CHECK: mov.u32 r0, %nctaid.x;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.nctaid.x()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.nctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_nctaid_y() {
|
||||
; CHECK: mov.u16 rh0, %nctaid.y;
|
||||
define ptx_device i32 @test_nctaid_y() {
|
||||
; CHECK: mov.u32 r0, %nctaid.y;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.nctaid.y()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.nctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_nctaid_z() {
|
||||
; CHECK: mov.u16 rh0, %nctaid.z;
|
||||
define ptx_device i32 @test_nctaid_z() {
|
||||
; CHECK: mov.u32 r0, %nctaid.z;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.nctaid.z()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.nctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @test_nctaid_w() {
|
||||
; CHECK: mov.u16 rh0, %nctaid.w;
|
||||
define ptx_device i32 @test_nctaid_w() {
|
||||
; CHECK: mov.u32 r0, %nctaid.w;
|
||||
; CHECK-NEXT: ret;
|
||||
%x = call i16 @llvm.ptx.read.nctaid.w()
|
||||
ret i16 %x
|
||||
%x = call i32 @llvm.ptx.read.nctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_smid() {
|
||||
|
@ -238,27 +238,27 @@ define ptx_device void @test_bar_sync() {
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i16 @llvm.ptx.read.tid.x()
|
||||
declare i16 @llvm.ptx.read.tid.y()
|
||||
declare i16 @llvm.ptx.read.tid.z()
|
||||
declare i16 @llvm.ptx.read.tid.w()
|
||||
declare i16 @llvm.ptx.read.ntid.x()
|
||||
declare i16 @llvm.ptx.read.ntid.y()
|
||||
declare i16 @llvm.ptx.read.ntid.z()
|
||||
declare i16 @llvm.ptx.read.ntid.w()
|
||||
declare i32 @llvm.ptx.read.tid.x()
|
||||
declare i32 @llvm.ptx.read.tid.y()
|
||||
declare i32 @llvm.ptx.read.tid.z()
|
||||
declare i32 @llvm.ptx.read.tid.w()
|
||||
declare i32 @llvm.ptx.read.ntid.x()
|
||||
declare i32 @llvm.ptx.read.ntid.y()
|
||||
declare i32 @llvm.ptx.read.ntid.z()
|
||||
declare i32 @llvm.ptx.read.ntid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.laneid()
|
||||
declare i32 @llvm.ptx.read.warpid()
|
||||
declare i32 @llvm.ptx.read.nwarpid()
|
||||
|
||||
declare i16 @llvm.ptx.read.ctaid.x()
|
||||
declare i16 @llvm.ptx.read.ctaid.y()
|
||||
declare i16 @llvm.ptx.read.ctaid.z()
|
||||
declare i16 @llvm.ptx.read.ctaid.w()
|
||||
declare i16 @llvm.ptx.read.nctaid.x()
|
||||
declare i16 @llvm.ptx.read.nctaid.y()
|
||||
declare i16 @llvm.ptx.read.nctaid.z()
|
||||
declare i16 @llvm.ptx.read.nctaid.w()
|
||||
declare i32 @llvm.ptx.read.ctaid.x()
|
||||
declare i32 @llvm.ptx.read.ctaid.y()
|
||||
declare i32 @llvm.ptx.read.ctaid.z()
|
||||
declare i32 @llvm.ptx.read.ctaid.w()
|
||||
declare i32 @llvm.ptx.read.nctaid.x()
|
||||
declare i32 @llvm.ptx.read.nctaid.y()
|
||||
declare i32 @llvm.ptx.read.nctaid.z()
|
||||
declare i32 @llvm.ptx.read.nctaid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.smid()
|
||||
declare i32 @llvm.ptx.read.nsmid()
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx22 | grep ".version 2.2"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm10 | grep ".target sm_10"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
|
||||
|
||||
define ptx_device void @t1() {
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue