2012-05-05 04:18:50 +08:00
|
|
|
//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
def immFloat0 : PatLeaf<(fpimm), [{
|
|
|
|
float f = (float)N->getValueAPF().convertToFloat();
|
|
|
|
return (f==0.0f);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
def immFloat1 : PatLeaf<(fpimm), [{
|
|
|
|
float f = (float)N->getValueAPF().convertToFloat();
|
|
|
|
return (f==1.0f);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
def immDouble0 : PatLeaf<(fpimm), [{
|
|
|
|
double d = (double)N->getValueAPF().convertToDouble();
|
|
|
|
return (d==0.0);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
def immDouble1 : PatLeaf<(fpimm), [{
|
|
|
|
double d = (double)N->getValueAPF().convertToDouble();
|
|
|
|
return (d==1.0);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------
|
2016-06-10 04:04:08 +08:00
|
|
|
// Synchronization and shuffle functions
|
2012-05-05 04:18:50 +08:00
|
|
|
//-----------------------------------
|
2016-02-18 01:46:54 +08:00
|
|
|
let isConvergent = 1 in {
|
2012-05-05 04:18:50 +08:00
|
|
|
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
|
|
|
|
"bar.sync \t0;",
|
|
|
|
[(int_nvvm_barrier0)]>;
|
2017-01-29 00:38:15 +08:00
|
|
|
def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
|
|
|
|
"bar.sync \t$src1;",
|
|
|
|
[(int_nvvm_barrier_n Int32Regs:$src1)]>;
|
|
|
|
def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
|
|
|
|
"bar.sync \t$src1, $src2;",
|
|
|
|
[(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
|
|
|
|
!strconcat("{{ \n\t",
|
2017-01-18 08:09:19 +08:00
|
|
|
".reg .pred \t%p1; \n\t",
|
|
|
|
"setp.ne.u32 \t%p1, $pred, 0; \n\t",
|
|
|
|
"bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
|
|
|
|
"}}"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
|
|
|
|
def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
|
|
|
|
!strconcat("{{ \n\t",
|
2017-01-18 08:09:19 +08:00
|
|
|
".reg .pred \t%p1; \n\t",
|
|
|
|
".reg .pred \t%p2; \n\t",
|
|
|
|
"setp.ne.u32 \t%p1, $pred, 0; \n\t",
|
|
|
|
"bar.red.and.pred \t%p2, 0, %p1; \n\t",
|
|
|
|
"selp.u32 \t$dst, 1, 0, %p2; \n\t",
|
|
|
|
"}}"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
|
|
|
|
def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
|
|
|
|
!strconcat("{{ \n\t",
|
2017-01-18 08:09:19 +08:00
|
|
|
".reg .pred \t%p1; \n\t",
|
|
|
|
".reg .pred \t%p2; \n\t",
|
|
|
|
"setp.ne.u32 \t%p1, $pred, 0; \n\t",
|
|
|
|
"bar.red.or.pred \t%p2, 0, %p1; \n\t",
|
|
|
|
"selp.u32 \t$dst, 1, 0, %p2; \n\t",
|
|
|
|
"}}"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
|
2016-06-10 04:04:08 +08:00
|
|
|
|
2017-01-18 08:09:36 +08:00
|
|
|
def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
|
2016-07-08 00:40:17 +08:00
|
|
|
[(int_nvvm_bar_sync imm:$i)]>;
|
|
|
|
|
2017-09-22 02:44:49 +08:00
|
|
|
def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
|
|
|
|
[(int_nvvm_bar_warp_sync imm:$i)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
|
|
|
|
[(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
|
|
|
|
def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
|
|
|
|
[(int_nvvm_barrier_sync imm:$i)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
|
|
|
|
[(int_nvvm_barrier_sync Int32Regs:$i)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
|
|
|
|
def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
|
|
|
|
"barrier.sync \t$id, $cnt;",
|
|
|
|
[(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
|
|
|
|
"barrier.sync \t$id, $cnt;",
|
|
|
|
[(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
|
|
|
|
"barrier.sync \t$id, $cnt;",
|
|
|
|
[(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
|
|
|
|
"barrier.sync \t$id, $cnt;",
|
|
|
|
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
|
|
|
|
|
2016-06-10 04:04:08 +08:00
|
|
|
// shfl.{up,down,bfly,idx}.b32
|
|
|
|
multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
|
|
|
|
// The last two parameters to shfl can be regs or imms. ptxas is smart
|
|
|
|
// enough to inline constant registers, so strictly speaking we don't need to
|
|
|
|
// handle immediates here. But it's easy enough, and it makes our ptx more
|
|
|
|
// readable.
|
|
|
|
def reg : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
|
|
|
|
[(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>;
|
|
|
|
|
|
|
|
def imm1 : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins regclass:$src, i32imm:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
|
|
|
|
[(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>;
|
|
|
|
|
|
|
|
def imm2 : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins regclass:$src, Int32Regs:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
|
|
|
|
[(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>;
|
|
|
|
|
|
|
|
def imm3 : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins regclass:$src, i32imm:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
|
|
|
|
[(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>;
|
|
|
|
}
|
|
|
|
|
2016-07-07 03:52:27 +08:00
|
|
|
defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>;
|
|
|
|
defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>;
|
|
|
|
defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>;
|
|
|
|
defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>;
|
|
|
|
defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>;
|
|
|
|
defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>;
|
|
|
|
defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>;
|
|
|
|
defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>;
|
2016-06-10 04:04:08 +08:00
|
|
|
|
2017-09-21 05:23:07 +08:00
|
|
|
multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
|
|
|
|
// Threadmask and the last two parameters to shfl.sync can be regs or imms.
|
|
|
|
// ptxas is smart enough to inline constant registers, so strictly speaking we
|
|
|
|
// don't need to handle immediates here. But it's easy enough, and it makes
|
|
|
|
// our ptx more readable.
|
|
|
|
def rrr : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
|
|
|
|
Int32Regs:$offset, Int32Regs:$mask))]>;
|
|
|
|
|
|
|
|
def rri : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
|
|
|
|
Int32Regs:$offset, imm:$mask))]>;
|
|
|
|
|
|
|
|
def rir : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
|
|
|
|
imm:$offset, Int32Regs:$mask))]>;
|
|
|
|
|
|
|
|
def rii : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src,
|
|
|
|
imm:$offset, imm:$mask))]>;
|
|
|
|
|
|
|
|
def irr : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
|
|
|
|
Int32Regs:$offset, Int32Regs:$mask))]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2017-09-21 05:23:07 +08:00
|
|
|
def iri : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
|
|
|
|
Int32Regs:$offset, imm:$mask))]>;
|
|
|
|
|
|
|
|
def iir : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
|
|
|
|
imm:$offset, Int32Regs:$mask))]>;
|
|
|
|
|
|
|
|
def iii : NVPTXInst<
|
|
|
|
(outs regclass:$dst),
|
|
|
|
(ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask),
|
|
|
|
!strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"),
|
|
|
|
[(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src,
|
|
|
|
imm:$offset, imm:$mask))]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// On sm_70 these don't have to be convergent, so we may eventually want to
|
|
|
|
// implement non-convergent variant of this intrinsic.
|
|
|
|
defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>;
|
|
|
|
defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>;
|
|
|
|
defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>;
|
|
|
|
defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>;
|
|
|
|
defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>;
|
|
|
|
defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>;
|
|
|
|
defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>;
|
|
|
|
defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>;
|
|
|
|
|
2017-09-22 02:44:49 +08:00
|
|
|
|
|
|
|
// vote.{all,any,uni,ballot}
|
|
|
|
multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
|
|
|
|
def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
|
|
|
|
"vote." # mode # " \t$dest, $pred;",
|
|
|
|
[(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
|
|
|
|
defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
|
|
|
|
defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
|
|
|
|
defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
|
|
|
|
|
|
|
|
// vote.sync.{all,any,uni,ballot}
|
|
|
|
multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
|
|
|
|
def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
|
|
|
|
"vote.sync." # mode # " \t$dest, $pred, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
|
|
|
|
"vote.sync." # mode #" \t$dest, $pred, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
|
|
|
|
defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
|
|
|
|
defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
|
|
|
|
defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
|
|
|
|
|
2017-09-27 01:07:23 +08:00
|
|
|
multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
|
|
|
|
Operand ImmOp> {
|
|
|
|
def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
|
|
|
|
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
|
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
|
|
|
|
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
|
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
|
|
|
|
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
|
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
|
|
|
|
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
|
|
|
|
[(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
|
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
|
|
|
|
i32imm>;
|
|
|
|
defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
|
|
|
|
i64imm>;
|
|
|
|
|
|
|
|
multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
|
|
|
|
Operand ImmOp> {
|
|
|
|
def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
|
|
|
|
(ins i32imm:$mask, ImmOp:$value),
|
|
|
|
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
|
2018-03-02 02:28:45 +08:00
|
|
|
[(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
|
2017-09-27 01:07:23 +08:00
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
|
|
|
|
(ins Int32Regs:$mask, ImmOp:$value),
|
|
|
|
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
|
2018-03-02 02:28:45 +08:00
|
|
|
[(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
|
2017-09-27 01:07:23 +08:00
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
|
|
|
|
(ins i32imm:$mask, regclass:$value),
|
|
|
|
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
|
2018-03-02 02:28:45 +08:00
|
|
|
[(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
|
2017-09-27 01:07:23 +08:00
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
|
|
|
|
(ins Int32Regs:$mask, regclass:$value),
|
|
|
|
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
|
2018-03-02 02:28:45 +08:00
|
|
|
[(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
|
2017-09-27 01:07:23 +08:00
|
|
|
Requires<[hasPTX60, hasSM70]>;
|
|
|
|
}
|
|
|
|
defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
|
|
|
|
i32imm>;
|
|
|
|
defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
|
|
|
|
i64imm>;
|
|
|
|
|
2017-09-21 05:23:07 +08:00
|
|
|
} // isConvergent = 1
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Explicit Memory Fence Functions
|
|
|
|
//-----------------------------------
|
|
|
|
class MEMBAR<string StrOp, Intrinsic IntOP> :
|
|
|
|
NVPTXInst<(outs), (ins),
|
|
|
|
StrOp, [(IntOP)]>;
|
|
|
|
|
|
|
|
def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
|
|
|
|
def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
|
|
|
|
def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Math Functions
|
|
|
|
//-----------------------------------
|
|
|
|
|
|
|
|
// Map min(1.0, max(0.0, x)) to sat(x)
|
2013-06-29 01:58:04 +08:00
|
|
|
// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
|
|
|
|
// NaN
|
2012-05-05 04:18:50 +08:00
|
|
|
// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
|
|
|
|
// Same story for fmax, fmin.
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_fmin_f immFloat1,
|
|
|
|
(int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_f immFloat1,
|
|
|
|
(int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_f
|
|
|
|
(int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_f
|
|
|
|
(int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_fmin_d immDouble1,
|
|
|
|
(int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_d immDouble1,
|
|
|
|
(int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_d
|
|
|
|
(int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_fmin_d
|
|
|
|
(int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
|
|
|
|
// We need a full string for OpcStr here because we need to deal with case like
|
|
|
|
// INT_PTX_RECIP.
|
|
|
|
class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
|
|
|
|
NVPTXRegClass src_regclass, Intrinsic IntOP>
|
|
|
|
: NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
|
|
|
|
OpcStr,
|
|
|
|
[(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
|
|
|
|
|
|
|
|
// We need a full string for OpcStr here because we need to deal with the case
|
|
|
|
// like INT_PTX_NATIVE_POWR_F.
|
|
|
|
class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
|
|
|
|
NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
|
|
|
|
: NVPTXInst<(outs t_regclass:$dst),
|
|
|
|
(ins s0_regclass:$src0, s1_regclass:$src1),
|
|
|
|
OpcStr,
|
|
|
|
[(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
|
|
|
|
|
|
|
|
class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
|
|
|
|
NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
|
|
|
|
NVPTXRegClass s2_regclass, Intrinsic IntOP>
|
|
|
|
: NVPTXInst<(outs t_regclass:$dst),
|
|
|
|
(ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
|
|
|
|
OpcStr,
|
|
|
|
[(set t_regclass:$dst,
|
|
|
|
(IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// MISC
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
|
|
|
|
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Min Max
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_fmin_f>;
|
|
|
|
def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_fmax_f>;
|
|
|
|
def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_fmin_d>;
|
|
|
|
def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_fmax_d>;
|
|
|
|
|
[NVPTX] Auto-upgrade some NVPTX intrinsics to LLVM target-generic code.
Summary:
Specifically, we upgrade llvm.nvvm.:
* brev{32,64}
* clz.{i,ll}
* popc.{i,ll}
* abs.{i,ll}
* {min,max}.{i,ll,u,ull}
* h2f
These either map directly to an existing LLVM target-generic
intrinsic or map to a simple LLVM target-generic idiom.
In all cases, we check that the code we generate is lowered to PTX as we
expect.
These builtins don't need to be backfilled in clang: They're not
accessible to user code from nvcc.
Reviewers: tra
Subscribers: majnemer, cfe-commits, llvm-commits, jholewinski
Differential Revision: https://reviews.llvm.org/D28793
llvm-svn: 292694
2017-01-21 09:00:32 +08:00
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
//
|
|
|
|
// Multiplication
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
|
|
|
|
Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
|
|
|
|
def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
|
|
|
|
Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
|
|
|
|
|
|
|
|
def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
|
|
|
|
Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
|
|
|
|
def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
|
|
|
|
Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
|
|
|
|
|
|
|
|
def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
|
|
|
|
def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
|
|
|
|
def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
|
|
|
|
def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
|
|
|
|
def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
|
|
|
|
def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
|
|
|
|
def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
|
|
|
|
def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
|
|
|
|
def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
|
|
|
|
def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
|
|
|
|
def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
|
|
|
|
|
|
|
|
def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
|
|
|
|
Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
|
|
|
|
def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
|
|
|
|
Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Div
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_DIV_APPROX_FTZ_F
|
|
|
|
: F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
|
|
|
|
def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
|
|
|
|
def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
|
|
|
|
def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
|
|
|
|
def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
|
|
|
|
def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
|
|
|
|
def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
|
|
|
|
def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
|
|
|
|
def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
|
|
|
|
def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
|
|
|
|
def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
|
|
|
|
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Sad
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
|
|
|
|
def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Floor Ceil
|
|
|
|
//
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_floor_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_floor_d Float64Regs:$a),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
|
|
|
|
def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Abs
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_fabs_ftz_f>;
|
|
|
|
def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_fabs_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_fabs_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Round
|
|
|
|
//
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_round_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_round_d Float64Regs:$a),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Trunc
|
|
|
|
//
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Saturate
|
|
|
|
//
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
|
|
|
|
(CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
|
|
|
|
def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
|
|
|
|
(CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// Exp2 Log2
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
|
|
|
|
def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
|
|
|
|
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
|
|
|
|
|
|
|
|
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
|
|
|
|
def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
|
|
|
|
def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Sin Cos
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
|
|
|
|
def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
|
|
|
|
def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Fma
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_FMA_RN_FTZ_F
|
|
|
|
: F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
|
|
|
|
def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
|
|
|
|
def INT_NVVM_FMA_RZ_FTZ_F
|
|
|
|
: F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
|
|
|
|
def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
|
|
|
|
def INT_NVVM_FMA_RM_FTZ_F
|
|
|
|
: F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
|
|
|
|
def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
|
|
|
|
def INT_NVVM_FMA_RP_FTZ_F
|
|
|
|
: F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
|
|
|
|
def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
|
|
|
|
def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
|
|
|
|
def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
|
|
|
|
def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Rcp
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
|
|
|
|
def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
|
|
|
|
def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
|
|
|
|
def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
|
|
|
|
def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
|
|
|
|
def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
|
|
|
|
def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
|
|
|
|
def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_rcp_rn_d>;
|
|
|
|
def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_rcp_rz_d>;
|
|
|
|
def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_rcp_rm_d>;
|
|
|
|
def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_rcp_rp_d>;
|
|
|
|
|
|
|
|
def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Sqrt
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
|
|
|
|
def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_sqrt_rn_f>;
|
|
|
|
def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
|
|
|
|
def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_sqrt_rz_f>;
|
|
|
|
def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
|
|
|
|
def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_sqrt_rm_f>;
|
|
|
|
def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
|
|
|
|
def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Float32Regs, int_nvvm_sqrt_rp_f>;
|
|
|
|
def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
|
|
|
|
def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_sqrt_rn_d>;
|
|
|
|
def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_sqrt_rz_d>;
|
|
|
|
def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_sqrt_rm_d>;
|
|
|
|
def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Float64Regs, int_nvvm_sqrt_rp_d>;
|
|
|
|
|
2013-05-22 00:51:30 +08:00
|
|
|
// nvvm_sqrt intrinsic
|
|
|
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
|
|
|
(INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
|
|
|
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
|
|
|
(INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
|
|
|
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
|
|
|
(INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
|
|
|
|
def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
|
|
|
|
(INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
|
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
//
|
|
|
|
// Rsqrt
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_RSQRT_APPROX_FTZ_F
|
|
|
|
: F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
|
|
|
|
int_nvvm_rsqrt_approx_ftz_f>;
|
|
|
|
def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
|
|
|
|
Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
|
|
|
|
def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
|
|
|
|
Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Add
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
|
|
|
|
def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
|
|
|
|
def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
|
|
|
|
def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
|
|
|
|
def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
|
|
|
|
def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
|
|
|
|
def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
|
|
|
|
def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
|
|
|
|
Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
|
|
|
|
|
|
|
|
def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
|
|
|
|
def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
|
|
|
|
def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
|
|
|
|
def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
|
|
|
|
Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
|
|
|
|
|
|
|
|
//
|
|
|
|
// Convert
|
|
|
|
//
|
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
|
|
|
|
(CVT_f32_f64 Float64Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
|
|
|
|
(CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
|
|
|
|
(CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
|
|
|
|
(CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
|
|
|
|
(CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
|
|
|
|
(CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
|
|
|
|
(CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
|
|
|
|
(CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
|
|
|
|
(CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
|
|
|
|
(CVT_f64_s32 Int32Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
|
|
|
|
(CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
|
|
|
|
(CVT_f64_s32 Int32Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
|
|
|
|
(CVT_f64_s32 Int32Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
|
|
|
|
(CVT_f64_u32 Int32Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
|
|
|
|
(CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
|
|
|
|
(CVT_f64_u32 Int32Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
|
|
|
|
(CVT_f64_u32 Int32Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
|
|
|
|
(CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
|
|
|
|
(CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
|
|
|
|
(CVT_f32_s32 Int32Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
|
|
|
|
(CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
|
|
|
|
(CVT_f32_s32 Int32Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
|
|
|
|
(CVT_f32_s32 Int32Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
|
|
|
|
(CVT_f32_u32 Int32Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
|
|
|
|
(CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
|
|
|
|
(CVT_f32_u32 Int32Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
|
|
|
|
(CVT_f32_u32 Int32Regs:$a, CvtRP)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
|
|
|
|
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
|
|
|
|
|
2017-01-18 08:09:19 +08:00
|
|
|
def INT_NVVM_D2I_LO : F_MATH_1<
|
|
|
|
!strconcat("{{\n\t",
|
|
|
|
".reg .b32 %temp; \n\t",
|
|
|
|
"mov.b64 \t{$dst, %temp}, $src0;\n\t",
|
|
|
|
"}}"),
|
|
|
|
Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
|
|
|
|
def INT_NVVM_D2I_HI : F_MATH_1<
|
|
|
|
!strconcat("{{\n\t",
|
|
|
|
".reg .b32 %temp; \n\t",
|
|
|
|
"mov.b64 \t{%temp, $dst}, $src0;\n\t",
|
|
|
|
"}}"),
|
|
|
|
Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2013-06-29 01:58:04 +08:00
|
|
|
def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
|
|
|
|
(CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
|
|
|
|
def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
|
|
|
|
(CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
|
|
|
|
(CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
|
|
|
|
(CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
|
|
|
|
(CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
|
|
|
|
(CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
|
|
|
|
(CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
|
|
|
|
(CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
|
|
|
|
(CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
|
|
|
|
def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
|
|
|
|
(CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
|
|
|
|
(CVT_f32_s64 Int64Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
|
|
|
|
(CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
|
|
|
|
(CVT_f32_s64 Int64Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
|
|
|
|
(CVT_f32_s64 Int64Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
|
|
|
|
(CVT_f32_u64 Int64Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
|
|
|
|
(CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
|
|
|
|
(CVT_f32_u64 Int64Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
|
|
|
|
(CVT_f32_u64 Int64Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
|
|
|
|
(CVT_f64_s64 Int64Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
|
|
|
|
(CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
|
|
|
|
(CVT_f64_s64 Int64Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
|
|
|
|
(CVT_f64_s64 Int64Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
|
|
|
|
(CVT_f64_u64 Int64Regs:$a, CvtRN)>;
|
|
|
|
def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
|
|
|
|
(CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
|
|
|
|
def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
|
|
|
|
(CVT_f64_u64 Int64Regs:$a, CvtRM)>;
|
|
|
|
def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
|
|
|
|
(CVT_f64_u64 Int64Regs:$a, CvtRP)>;
|
|
|
|
|
|
|
|
|
2017-01-14 04:56:17 +08:00
|
|
|
def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
|
|
|
|
(BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
|
|
|
|
def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
|
|
|
|
(BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
|
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
//
|
|
|
|
// Bitcast
|
|
|
|
//
|
|
|
|
|
|
|
|
def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
|
|
|
|
Float32Regs, int_nvvm_bitcast_f2i>;
|
|
|
|
def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
|
|
|
|
Int32Regs, int_nvvm_bitcast_i2f>;
|
|
|
|
|
|
|
|
def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
|
|
|
|
Int64Regs, int_nvvm_bitcast_ll2d>;
|
|
|
|
def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
|
|
|
|
Float64Regs, int_nvvm_bitcast_d2ll>;
|
|
|
|
|
2017-12-07 01:50:05 +08:00
|
|
|
//
|
|
|
|
// FNS
|
|
|
|
//
|
|
|
|
|
|
|
|
class INT_FNS_MBO<dag ins, dag Operands>
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst), ins,
|
|
|
|
"fns.b32 \t$dst, $mask, $base, $offset;",
|
|
|
|
[(set Int32Regs:$dst, Operands )]>,
|
|
|
|
Requires<[hasPTX60, hasSM30]>;
|
|
|
|
|
|
|
|
def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
|
|
|
|
(int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
|
|
|
|
def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
|
|
|
|
(int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
|
|
|
|
def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
|
|
|
|
(int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
|
|
|
|
def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
|
|
|
|
(int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
|
|
|
|
def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
|
|
|
|
(int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
|
|
|
|
def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
|
|
|
|
(int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
|
|
|
|
def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
|
|
|
|
(int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
|
|
|
|
def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
|
|
|
|
(int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
|
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
//-----------------------------------
|
|
|
|
// Atomic Functions
|
|
|
|
//-----------------------------------
|
|
|
|
|
|
|
|
class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
|
|
|
|
: PatFrag<ops, frag, [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
|
|
|
|
}]>;
|
|
|
|
class ATOMIC_SHARED_CHK <dag ops, dag frag>
|
|
|
|
: PatFrag<ops, frag, [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
|
|
|
|
}]>;
|
|
|
|
class ATOMIC_GENERIC_CHK <dag ops, dag frag>
|
|
|
|
: PatFrag<ops, frag, [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
|
|
|
|
}]>;
|
|
|
|
|
|
|
|
multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
|
|
|
|
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
|
2018-03-01 02:51:22 +08:00
|
|
|
Operand IMMType, SDNode IMM, list<Predicate> Pred> {
|
2012-05-05 04:18:50 +08:00
|
|
|
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
|
2018-03-01 02:51:22 +08:00
|
|
|
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
|
|
|
|
list<Predicate> Pred = []> {
|
2012-05-05 04:18:50 +08:00
|
|
|
defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, IMM, Pred>;
|
|
|
|
defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, IMM, Pred>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// has 2 operands, neg the second one
|
|
|
|
multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
|
|
|
|
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
|
2018-03-01 02:51:22 +08:00
|
|
|
Operand IMMType, list<Predicate> Pred> {
|
2012-05-05 04:18:50 +08:00
|
|
|
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat(
|
|
|
|
"{{ \n\t",
|
|
|
|
".reg \t.s", TypeStr, " temp; \n\t",
|
|
|
|
"neg.s", TypeStr, " \ttemp, $b; \n\t",
|
|
|
|
"atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
|
|
|
|
"}}"),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
|
|
|
|
string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
|
2018-03-01 02:51:22 +08:00
|
|
|
list<Predicate> Pred = []> {
|
2012-05-05 04:18:50 +08:00
|
|
|
defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, Pred> ;
|
|
|
|
defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, Pred> ;
|
|
|
|
}
|
|
|
|
|
|
|
|
// has 3 operands
|
|
|
|
multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
|
|
|
|
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
|
2018-03-01 02:51:22 +08:00
|
|
|
Operand IMMType, list<Predicate> Pred> {
|
2012-05-05 04:18:50 +08:00
|
|
|
def reg : NVPTXInst<(outs regclass:$dst),
|
|
|
|
(ins ptrclass:$addr, regclass:$b, regclass:$c),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2017-01-18 08:09:19 +08:00
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
def imm1 : NVPTXInst<(outs regclass:$dst),
|
|
|
|
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2017-01-18 08:09:19 +08:00
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
def imm2 : NVPTXInst<(outs regclass:$dst),
|
|
|
|
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2017-01-18 08:09:19 +08:00
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
def imm3 : NVPTXInst<(outs regclass:$dst),
|
|
|
|
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
|
|
|
|
[(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
|
2018-03-01 02:51:22 +08:00
|
|
|
Requires<Pred>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
|
2018-03-01 02:51:22 +08:00
|
|
|
string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
|
2012-05-05 04:18:50 +08:00
|
|
|
defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, Pred>;
|
|
|
|
defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
|
|
|
|
IntOp, IMMType, Pred>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom_add
|
|
|
|
|
|
|
|
def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_add_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
|
2017-11-08 06:10:54 +08:00
|
|
|
def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".add", atomic_load_add_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_64_g, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_64_s, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".add", atomic_load_add_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f32_g, f32imm, fpimm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f32_s, f32imm, fpimm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f32_gen, f32imm, fpimm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2017-11-08 06:10:54 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f64_g, f64imm, fpimm, [hasAtomAddF64]>;
|
2017-11-08 06:10:54 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f64_s, f64imm, fpimm, [hasAtomAddF64]>;
|
2017-11-08 06:10:54 +08:00
|
|
|
defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_add_f64_gen, f64imm, fpimm, [hasAtomAddF64]>;
|
2017-11-08 06:10:54 +08:00
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
// atom_sub
|
|
|
|
|
|
|
|
def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_sub_64 node:$a, node:$b)>;
|
|
|
|
|
|
|
|
defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_32_g, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_64_g, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_32_gen, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".add", atomic_load_sub_32_gen, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_32_s, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_64_s, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_sub_64_gen, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".add", atomic_load_sub_64_gen, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_swap
|
|
|
|
|
|
|
|
def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_32 node:$a, node:$b)>;
|
|
|
|
def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_32 node:$a, node:$b)>;
|
|
|
|
def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_32 node:$a, node:$b)>;
|
|
|
|
def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_64 node:$a, node:$b)>;
|
|
|
|
def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_64 node:$a, node:$b)>;
|
|
|
|
def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_swap_64 node:$a, node:$b)>;
|
|
|
|
|
|
|
|
defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".exch", atomic_swap_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_64_g, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_64_s, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_swap_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".exch", atomic_swap_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_max
|
|
|
|
|
|
|
|
def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
|
|
|
|
, (atomic_load_max_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_max_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_max_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
|
|
|
|
, (atomic_load_max_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_max_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_max_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umax_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_max_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_max_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_max_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_max_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_max_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_max_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_umax_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_umax_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_umax_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_umax_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".max", atomic_load_umax_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_umax_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_min
|
|
|
|
|
|
|
|
def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_min_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_umin_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_min_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_min_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_min_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_min_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_min_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_min_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_umin_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_umin_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_umin_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_umin_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".min", atomic_load_umin_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_umin_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
|
2018-03-01 02:51:22 +08:00
|
|
|
".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_inc atom_dec
|
|
|
|
|
|
|
|
def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
|
|
|
|
|
|
|
|
defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_inc_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_inc_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_inc_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".inc", atomic_load_inc_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_dec_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_dec_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_dec_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".dec", atomic_load_dec_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_and
|
|
|
|
|
|
|
|
def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_and_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".and", atomic_load_and_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_and_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".and", atomic_load_and_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_or
|
|
|
|
|
|
|
|
def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_or_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".or", atomic_load_or_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_32_s, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".or", atomic_load_or_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_or_64_s, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_xor
|
|
|
|
|
|
|
|
def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_32 node:$a, node:$b)>;
|
|
|
|
def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_32 node:$a, node:$b)>;
|
2014-06-28 02:35:30 +08:00
|
|
|
def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_64 node:$a, node:$b)>;
|
|
|
|
def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
|
|
|
|
(atomic_load_xor_64 node:$a, node:$b)>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_32_g, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_32_s, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_32_gen, i32imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".xor", atomic_load_xor_32_gen, i32imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_64_g, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_64_s, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_load_xor_64_gen, i64imm, imm>;
|
2014-06-28 02:35:30 +08:00
|
|
|
defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".xor", atomic_load_xor_64_gen, i64imm, imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// atom_cas
|
|
|
|
|
|
|
|
def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
|
|
|
|
def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
|
|
|
|
def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
|
|
|
|
def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
|
|
|
|
def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
|
|
|
|
def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
|
|
|
|
(atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
|
|
|
|
|
|
|
|
defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_32_g, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_32_s, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_32_gen, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
|
2018-03-01 02:51:22 +08:00
|
|
|
".cas", atomic_cmp_swap_32_gen, i32imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_64_g, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_64_s, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
|
2018-03-01 02:51:22 +08:00
|
|
|
atomic_cmp_swap_64_gen, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
|
2018-03-01 02:51:22 +08:00
|
|
|
".cas", atomic_cmp_swap_64_gen, i64imm>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2016-09-29 01:25:38 +08:00
|
|
|
// Support for scoped atomic operations. Matches
|
|
|
|
// int_nvvm_atomic_{op}_{space}_{type}_{scope}
|
|
|
|
// and converts it into the appropriate instruction.
|
|
|
|
// NOTE: not all possible combinations are implemented
|
|
|
|
// 'space' is limited to generic as it's the only one needed to support CUDA.
|
|
|
|
// 'scope' = 'gpu' is default and is handled by regular atomic instructions.
|
|
|
|
class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
|
|
|
|
dag ins, dag Operands>
|
|
|
|
: NVPTXInst<(outs regclass:$result), ins,
|
|
|
|
AsmStr,
|
|
|
|
[(set regclass:$result, Operands)]>,
|
|
|
|
Requires<Preds>;
|
|
|
|
|
|
|
|
// Define instruction variants for all addressing modes.
|
|
|
|
multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType,
|
|
|
|
SDNode Imm, ValueType ImmTy,
|
|
|
|
list<Predicate> Preds> {
|
|
|
|
let AddedComplexity = 1 in {
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, regclass:$b),
|
|
|
|
(Intr Int32Regs:$src, regclass:$b)>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, regclass:$b),
|
|
|
|
(Intr Int64Regs:$src, regclass:$b)>;
|
|
|
|
}
|
|
|
|
// tablegen can't infer argument types from Intrinsic (though it can
|
|
|
|
// from Instruction) so we have to enforce specific type on
|
|
|
|
// immediates via explicit cast to ImmTy.
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, ImmType:$b),
|
|
|
|
(Intr Int32Regs:$src, (ImmTy Imm:$b))>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, ImmType:$b),
|
|
|
|
(Intr Int64Regs:$src, (ImmTy Imm:$b))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType,
|
|
|
|
SDNode Imm, ValueType ImmTy,
|
|
|
|
list<Predicate> Preds> {
|
|
|
|
// Variants for register/immediate permutations of $b and $c
|
|
|
|
let AddedComplexity = 2 in {
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, regclass:$b, regclass:$c),
|
|
|
|
(Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, regclass:$b, regclass:$c),
|
|
|
|
(Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
|
|
|
|
}
|
|
|
|
let AddedComplexity = 1 in {
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, ImmType:$b, regclass:$c),
|
|
|
|
(Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, ImmType:$b, regclass:$c),
|
|
|
|
(Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, regclass:$b, ImmType:$c),
|
|
|
|
(Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, regclass:$b, ImmType:$c),
|
|
|
|
(Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
|
|
|
|
}
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int32Regs:$src, ImmType:$b, ImmType:$c),
|
|
|
|
(Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
|
|
|
|
def : ATOM23_impl<AsmStr, regclass, Preds,
|
|
|
|
(ins Int64Regs:$src, ImmType:$b, ImmType:$c),
|
|
|
|
(Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Constructs instrinsic name and instruction asm strings.
|
|
|
|
multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
string ScopeStr, string SpaceStr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
|
|
|
|
ValueType ImmTy, list<Predicate> Preds> {
|
|
|
|
defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
|
|
|
|
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
|
|
|
|
# "." # OpStr # "." # TypeStr
|
|
|
|
# " \t$result, [$src], $b;",
|
|
|
|
!cast<Intrinsic>(
|
|
|
|
"int_nvvm_atomic_" # OpStr
|
|
|
|
# "_" # SpaceStr # "_" # IntTypeStr
|
|
|
|
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
|
|
|
|
regclass, ImmType, Imm, ImmTy, Preds>;
|
|
|
|
}
|
|
|
|
multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
string ScopeStr, string SpaceStr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
|
|
|
|
ValueType ImmTy, list<Predicate> Preds> {
|
|
|
|
defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
|
|
|
|
# !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
|
|
|
|
# "." # OpStr # "." # TypeStr
|
|
|
|
# " \t$result, [$src], $b, $c;",
|
|
|
|
!cast<Intrinsic>(
|
|
|
|
"int_nvvm_atomic_" # OpStr
|
|
|
|
# "_" # SpaceStr # "_" # IntTypeStr
|
|
|
|
# !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
|
|
|
|
regclass, ImmType, Imm, ImmTy, Preds>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Constructs variants for different address spaces.
|
|
|
|
// For now we only need variants for generic space pointers.
|
|
|
|
multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
|
|
|
|
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
|
|
|
|
defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
|
|
|
|
regclass, ImmType, Imm, ImmTy, Preds>;
|
|
|
|
}
|
|
|
|
multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
|
|
|
|
SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
|
|
|
|
defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
|
|
|
|
regclass, ImmType, Imm, ImmTy, Preds>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Constructs variants for different scopes of atomic op.
|
|
|
|
multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
|
|
|
|
ValueType ImmTy, list<Predicate> Preds> {
|
|
|
|
// .gpu scope is default and is currently covered by existing
|
|
|
|
// atomics w/o explicitly specified scope.
|
|
|
|
defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
|
|
|
|
regclass, ImmType, Imm, ImmTy,
|
|
|
|
!listconcat(Preds,[hasAtomScope])>;
|
|
|
|
defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
|
|
|
|
regclass, ImmType, Imm, ImmTy,
|
|
|
|
!listconcat(Preds,[hasAtomScope])>;
|
|
|
|
}
|
|
|
|
multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
|
|
|
|
NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
|
|
|
|
list<Predicate> Preds> {
|
|
|
|
// No need to define ".gpu"-scoped atomics. They do the same thing
|
|
|
|
// as the regular, non-scoped atomics defined elsewhere.
|
|
|
|
defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
|
|
|
|
regclass, ImmType, Imm, ImmTy,
|
|
|
|
!listconcat(Preds,[hasAtomScope])>;
|
|
|
|
defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
|
|
|
|
regclass, ImmType, Imm, ImmTy,
|
|
|
|
!listconcat(Preds,[hasAtomScope])>;
|
|
|
|
}
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2016-09-29 01:25:38 +08:00
|
|
|
// atom.add
|
|
|
|
multiclass ATOM2_add_impl<string OpStr> {
|
|
|
|
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
|
|
|
|
defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
|
2018-03-01 02:51:22 +08:00
|
|
|
[]>;
|
2016-09-29 01:25:38 +08:00
|
|
|
defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
|
|
|
|
[hasAtomAddF64]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom.{and,or,xor}
|
|
|
|
multiclass ATOM2_bitwise_impl<string OpStr> {
|
|
|
|
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
|
|
|
|
[hasAtomBitwise64]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom.exch
|
|
|
|
multiclass ATOM2_exch_impl<string OpStr> {
|
|
|
|
defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom.{min,max}
|
|
|
|
multiclass ATOM2_minmax_impl<string OpStr> {
|
|
|
|
defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
|
|
|
|
[hasAtomMinMax64]>;
|
|
|
|
defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
|
|
|
|
[hasAtomMinMax64]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom.{inc,dec}
|
|
|
|
multiclass ATOM2_incdec_impl<string OpStr> {
|
|
|
|
defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// atom.cas
|
|
|
|
multiclass ATOM3_cas_impl<string OpStr> {
|
|
|
|
defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
|
|
|
|
defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
|
|
|
|
}
|
2012-05-05 04:18:50 +08:00
|
|
|
|
2016-09-29 01:25:38 +08:00
|
|
|
defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
|
|
|
|
defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
|
|
|
|
defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
|
|
|
|
defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
|
|
|
|
defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
|
|
|
|
defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
|
|
|
|
defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
|
|
|
|
defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
|
|
|
|
defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">;
|
|
|
|
defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Support for ldu on sm_20 or later
|
|
|
|
//-----------------------------------
|
|
|
|
|
2016-03-02 03:44:22 +08:00
|
|
|
// Don't annotate ldu instructions as mayLoad, as they load from memory that is
|
|
|
|
// read-only in a kernel.
|
|
|
|
|
2012-05-05 04:18:50 +08:00
|
|
|
// Scalar
|
2016-03-02 03:44:22 +08:00
|
|
|
|
2014-06-28 02:35:51 +08:00
|
|
|
multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
|
2013-06-29 01:57:59 +08:00
|
|
|
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDU]>;
|
2013-06-29 01:57:59 +08:00
|
|
|
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDU]>;
|
|
|
|
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
|
2013-06-29 01:57:59 +08:00
|
|
|
!strconcat("ldu.global.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDU]>;
|
2013-06-29 01:57:59 +08:00
|
|
|
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDU]>;
|
2013-06-29 01:57:59 +08:00
|
|
|
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDU]>;
|
2013-06-29 01:57:59 +08:00
|
|
|
}
|
|
|
|
|
2014-06-28 02:35:51 +08:00
|
|
|
defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
|
2017-03-03 03:14:10 +08:00
|
|
|
defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
|
2014-06-28 02:35:51 +08:00
|
|
|
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
|
|
|
|
defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
// vector
|
|
|
|
|
|
|
|
// Elementized vector ldu
|
|
|
|
multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
|
2013-07-01 20:58:52 +08:00
|
|
|
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins Int32Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins Int64Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins MEMri:$src),
|
2012-05-05 04:18:50 +08:00
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
2013-07-01 20:58:52 +08:00
|
|
|
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins MEMri64:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins imemAny:$src),
|
2012-05-05 04:18:50 +08:00
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
}
|
|
|
|
|
2013-07-01 20:58:52 +08:00
|
|
|
multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
|
|
|
|
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins Int32Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins Int64Regs:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins MEMri:$src),
|
2012-05-05 04:18:50 +08:00
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
2013-07-01 20:58:52 +08:00
|
|
|
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins MEMri64:$src),
|
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins imemAny:$src),
|
2012-05-05 04:18:50 +08:00
|
|
|
!strconcat("ldu.global.", TyStr), []>;
|
|
|
|
}
|
|
|
|
|
|
|
|
defm INT_PTX_LDU_G_v2i8_ELE
|
2013-06-29 01:57:59 +08:00
|
|
|
: VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_LDU_G_v2i16_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v2i32_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
|
2017-03-03 03:14:10 +08:00
|
|
|
defm INT_PTX_LDU_G_v2f16_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v2f16x2_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_LDU_G_v2f32_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v2i64_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v2f64_ELE
|
|
|
|
: VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v4i8_ELE
|
2013-06-29 01:57:59 +08:00
|
|
|
: VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_LDU_G_v4i16_ELE
|
|
|
|
: VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
|
|
|
Int16Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v4i32_ELE
|
|
|
|
: VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
|
|
|
Int32Regs>;
|
2017-03-03 03:14:10 +08:00
|
|
|
defm INT_PTX_LDU_G_v4f16_ELE
|
|
|
|
: VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
|
|
|
Float16Regs>;
|
|
|
|
defm INT_PTX_LDU_G_v4f16x2_ELE
|
|
|
|
: VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
|
|
|
Float16x2Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
defm INT_PTX_LDU_G_v4f32_ELE
|
|
|
|
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
|
|
|
|
Float32Regs>;
|
|
|
|
|
2013-02-12 22:18:49 +08:00
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Support for ldg on sm_35 or later
|
|
|
|
//-----------------------------------
|
|
|
|
|
2016-03-02 03:44:22 +08:00
|
|
|
// Don't annotate ld.global.nc as mayLoad, because these loads go through the
|
|
|
|
// non-coherent texture cache, and therefore the values read must be read-only
|
|
|
|
// during the lifetime of the kernel.
|
|
|
|
|
2014-06-28 02:35:51 +08:00
|
|
|
multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
|
2013-02-12 22:18:49 +08:00
|
|
|
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDG]>;
|
2013-02-12 22:18:49 +08:00
|
|
|
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDG]>;
|
|
|
|
def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
|
2013-02-12 22:18:49 +08:00
|
|
|
!strconcat("ld.global.nc.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDG]>;
|
2013-02-12 22:18:49 +08:00
|
|
|
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDG]>;
|
2013-02-12 22:18:49 +08:00
|
|
|
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr),
|
2014-06-28 02:35:51 +08:00
|
|
|
[]>, Requires<[hasLDG]>;
|
2013-02-12 22:18:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm INT_PTX_LDG_GLOBAL_i8
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u8 \t$result, [$src];", Int16Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_i16
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u16 \t$result, [$src];", Int16Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_i32
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_i64
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
|
2017-02-24 06:38:24 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_f16
|
|
|
|
: LDG_G<"b16 \t$result, [$src];", Float16Regs>;
|
|
|
|
defm INT_PTX_LDG_GLOBAL_f16x2
|
|
|
|
: LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_f32
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_f64
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"f64 \t$result, [$src];", Float64Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_p32
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_GLOBAL_p64
|
2014-06-28 02:35:51 +08:00
|
|
|
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
|
|
|
|
// vector
|
|
|
|
|
|
|
|
// Elementized vector ldg
|
|
|
|
multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
|
2013-07-01 20:58:52 +08:00
|
|
|
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
2013-02-12 22:18:49 +08:00
|
|
|
(ins Int32Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
2013-07-01 20:58:52 +08:00
|
|
|
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
2013-02-12 22:18:49 +08:00
|
|
|
(ins Int64Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
2013-07-01 20:58:52 +08:00
|
|
|
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins MEMri:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
|
|
|
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins MEMri64:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
|
|
|
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
|
|
|
|
(ins imemAny:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
|
2013-02-12 22:18:49 +08:00
|
|
|
multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
|
2013-07-01 20:58:52 +08:00
|
|
|
def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins Int32Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
|
|
|
def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins Int64Regs:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
|
|
|
def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins MEMri:$src),
|
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
|
|
|
def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins MEMri64:$src),
|
2013-02-12 22:18:49 +08:00
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
2013-07-01 20:58:52 +08:00
|
|
|
def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
|
|
|
|
regclass:$dst4), (ins imemAny:$src),
|
2013-02-12 22:18:49 +08:00
|
|
|
!strconcat("ld.global.nc.", TyStr), []>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
|
2013-02-12 22:18:49 +08:00
|
|
|
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
|
|
|
|
defm INT_PTX_LDG_G_v2i8_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v2i16_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v2i32_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
|
2017-03-03 03:14:10 +08:00
|
|
|
defm INT_PTX_LDG_G_v2f16_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v2f16x2_ELE
|
2017-02-24 06:38:24 +08:00
|
|
|
: VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_G_v2f32_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v2i64_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v2f64_ELE
|
|
|
|
: VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v4i8_ELE
|
|
|
|
: VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v4i16_ELE
|
|
|
|
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v4i32_ELE
|
|
|
|
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
|
2017-03-03 03:14:10 +08:00
|
|
|
defm INT_PTX_LDG_G_v4f16_ELE
|
|
|
|
: VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
|
|
|
|
defm INT_PTX_LDG_G_v4f16x2_ELE
|
2017-02-24 06:38:24 +08:00
|
|
|
: VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
|
2013-02-12 22:18:49 +08:00
|
|
|
defm INT_PTX_LDG_G_v4f32_ELE
|
|
|
|
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
|
|
|
|
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
|
|
|
|
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("cvta.", Str, ".u32 \t$result, $src;"),
|
2018-03-01 02:51:22 +08:00
|
|
|
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("cvta.", Str, ".u64 \t$result, $src;"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
|
2018-05-10 07:46:19 +08:00
|
|
|
def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
|
|
|
|
"{{ .reg .b64 %tmp;\n\t"
|
|
|
|
#" cvt.u64.u32 \t%tmp, $src;\n\t"
|
|
|
|
#" cvta." # Str # ".u64 \t$result, %tmp; }}",
|
|
|
|
[(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
|
|
|
|
Requires<[useShortPtr]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass G_TO_NG<string Str, Intrinsic Intrin> {
|
|
|
|
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
|
2018-03-01 02:51:22 +08:00
|
|
|
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
|
2018-05-10 07:46:19 +08:00
|
|
|
def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
|
|
|
|
"{{ .reg .b64 %tmp;\n\t"
|
|
|
|
#" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
|
|
|
|
#" cvt.u32.u64 \t$result, %tmp; }}",
|
|
|
|
[(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
|
|
|
|
Requires<[useShortPtr]>;
|
2012-05-05 04:18:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
|
|
|
|
defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
|
|
|
|
defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
|
2013-05-20 20:13:32 +08:00
|
|
|
defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
|
|
|
|
defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
|
|
|
|
defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
|
2013-05-20 20:13:32 +08:00
|
|
|
defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
|
|
|
|
// nvvm.ptr.gen.to.param
|
|
|
|
def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
|
|
|
|
(ins Int32Regs:$src),
|
|
|
|
"mov.u32 \t$result, $src;",
|
|
|
|
[(set Int32Regs:$result,
|
|
|
|
(int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
|
|
|
|
def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
|
|
|
|
(ins Int64Regs:$src),
|
|
|
|
"mov.u64 \t$result, $src;",
|
|
|
|
[(set Int64Regs:$result,
|
|
|
|
(int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
|
|
|
|
|
|
|
|
|
|
|
|
// nvvm.move intrinsicc
|
|
|
|
def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
|
|
|
|
"mov.b16 \t$r, $s;",
|
|
|
|
[(set Int16Regs:$r,
|
|
|
|
(int_nvvm_move_i16 Int16Regs:$s))]>;
|
|
|
|
def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
|
|
|
|
"mov.b32 \t$r, $s;",
|
|
|
|
[(set Int32Regs:$r,
|
|
|
|
(int_nvvm_move_i32 Int32Regs:$s))]>;
|
|
|
|
def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
|
|
|
|
"mov.b64 \t$r, $s;",
|
|
|
|
[(set Int64Regs:$r,
|
|
|
|
(int_nvvm_move_i64 Int64Regs:$s))]>;
|
|
|
|
def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
|
|
|
|
"mov.f32 \t$r, $s;",
|
|
|
|
[(set Float32Regs:$r,
|
|
|
|
(int_nvvm_move_float Float32Regs:$s))]>;
|
|
|
|
def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
|
|
|
|
"mov.f64 \t$r, $s;",
|
|
|
|
[(set Float64Regs:$r,
|
|
|
|
(int_nvvm_move_double Float64Regs:$s))]>;
|
|
|
|
def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
|
|
|
|
"mov.u32 \t$r, $s;",
|
|
|
|
[(set Int32Regs:$r,
|
|
|
|
(int_nvvm_move_ptr Int32Regs:$s))]>;
|
|
|
|
def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
|
|
|
|
"mov.u64 \t$r, $s;",
|
|
|
|
[(set Int64Regs:$r,
|
|
|
|
(int_nvvm_move_ptr Int64Regs:$s))]>;
|
|
|
|
|
|
|
|
// @TODO: Are these actually needed, or will we always just see symbols
|
|
|
|
// copied to registers first?
|
|
|
|
/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
|
|
|
|
"mov.u32 \t$r, $s;",
|
|
|
|
[(set Int32Regs:$r,
|
|
|
|
(int_nvvm_move_ptr texternalsym:$s))]>;
|
|
|
|
def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
|
|
|
|
"mov.u64 \t$r, $s;",
|
|
|
|
[(set Int64Regs:$r,
|
|
|
|
(int_nvvm_move_ptr texternalsym:$s))]>;*/
|
|
|
|
|
|
|
|
|
|
|
|
// MoveParam %r1, param
|
|
|
|
// ptr_local_to_gen %r2, %r1
|
|
|
|
// ptr_gen_to_local %r3, %r2
|
|
|
|
// ->
|
|
|
|
// mov %r1, param
|
|
|
|
|
|
|
|
// @TODO: Revisit this. There is a type
|
|
|
|
// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
|
|
|
|
// instructions are not currently defined. However, we can use the ptr
|
|
|
|
// variants and the asm printer will do the right thing.
|
|
|
|
def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
|
|
|
|
(MoveParam texternalsym:$src)))),
|
|
|
|
(nvvm_move_ptr64 texternalsym:$src)>;
|
|
|
|
def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
|
|
|
|
(MoveParam texternalsym:$src)))),
|
|
|
|
(nvvm_move_ptr32 texternalsym:$src)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def texsurf_handles
|
|
|
|
: NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
|
|
|
|
"mov.u64 \t$result, $src;", []>;
|
2012-05-05 04:18:50 +08:00
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Compiler Error Warn
|
|
|
|
// - Just ignore them in codegen
|
|
|
|
//-----------------------------------
|
|
|
|
|
|
|
|
def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
|
|
|
|
"// llvm.nvvm.compiler.warn()",
|
|
|
|
[(int_nvvm_compiler_warn Int32Regs:$a)]>;
|
|
|
|
def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
|
|
|
|
"// llvm.nvvm.compiler.warn()",
|
|
|
|
[(int_nvvm_compiler_warn Int64Regs:$a)]>;
|
|
|
|
def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
|
|
|
|
"// llvm.nvvm.compiler.error()",
|
|
|
|
[(int_nvvm_compiler_error Int32Regs:$a)]>;
|
|
|
|
def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
|
|
|
|
"// llvm.nvvm.compiler.error()",
|
|
|
|
[(int_nvvm_compiler_error Int64Regs:$a)]>;
|
|
|
|
|
|
|
|
|
2014-06-28 02:35:24 +08:00
|
|
|
// isspacep
|
|
|
|
|
|
|
|
def ISSPACEP_CONST_32
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
|
|
|
|
"isspacep.const \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
|
|
|
|
Requires<[hasPTX31]>;
|
|
|
|
def ISSPACEP_CONST_64
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"isspacep.const \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
|
|
|
|
Requires<[hasPTX31]>;
|
|
|
|
def ISSPACEP_GLOBAL_32
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
|
|
|
|
"isspacep.global \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
|
|
|
|
def ISSPACEP_GLOBAL_64
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"isspacep.global \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
|
|
|
|
def ISSPACEP_LOCAL_32
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
|
|
|
|
"isspacep.local \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
|
|
|
|
def ISSPACEP_LOCAL_64
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"isspacep.local \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
|
|
|
|
def ISSPACEP_SHARED_32
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
|
|
|
|
"isspacep.shared \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
|
|
|
|
def ISSPACEP_SHARED_64
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"isspacep.shared \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
|
|
|
|
|
|
|
|
|
2014-06-28 02:35:21 +08:00
|
|
|
// Special register reads
|
|
|
|
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
|
|
|
|
(ins SpecialRegs:$r),
|
2017-01-18 08:09:36 +08:00
|
|
|
"mov.b32 \t$d, $r;", []>;
|
2014-06-28 02:35:21 +08:00
|
|
|
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
|
|
|
|
def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
|
|
|
|
|
|
|
|
|
2014-06-28 02:35:33 +08:00
|
|
|
// rotate builtin support
|
|
|
|
|
|
|
|
def ROTATE_B32_HW_IMM
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$src, i32imm:$amt),
|
|
|
|
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
|
|
|
|
[(set Int32Regs:$dst,
|
|
|
|
(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
|
|
|
|
Requires<[hasHWROT32]> ;
|
|
|
|
|
|
|
|
def ROTATE_B32_HW_REG
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$src, Int32Regs:$amt),
|
|
|
|
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
|
|
|
|
[(set Int32Regs:$dst,
|
|
|
|
(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
|
|
|
|
Requires<[hasHWROT32]> ;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
|
|
|
|
(ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
|
|
|
|
Requires<[noHWROT32]> ;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
|
|
|
|
(ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
|
|
|
|
Requires<[noHWROT32]> ;
|
|
|
|
|
2016-04-01 09:09:05 +08:00
|
|
|
let hasSideEffects = 0 in {
|
2017-01-18 08:09:19 +08:00
|
|
|
def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
|
|
|
|
!strconcat("{{\n\t",
|
|
|
|
".reg .b32 %dummy;\n\t",
|
|
|
|
"mov.b64 \t{$dst,%dummy}, $src;\n\t",
|
|
|
|
"}}"),
|
2016-04-01 09:09:05 +08:00
|
|
|
[]> ;
|
|
|
|
|
2017-01-18 08:09:19 +08:00
|
|
|
def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
|
|
|
|
!strconcat("{{\n\t",
|
|
|
|
".reg .b32 %dummy;\n\t",
|
|
|
|
"mov.b64 \t{%dummy,$dst}, $src;\n\t",
|
|
|
|
"}}"),
|
2016-04-01 09:09:05 +08:00
|
|
|
[]> ;
|
|
|
|
}
|
|
|
|
|
|
|
|
let hasSideEffects = 0 in {
|
|
|
|
def PACK_TWO_INT32
|
|
|
|
: NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
|
|
|
|
"mov.b64 \t$dst, {{$lo, $hi}};", []> ;
|
|
|
|
}
|
2014-06-28 02:35:33 +08:00
|
|
|
|
|
|
|
def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
|
|
|
|
(PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
|
|
|
|
(GET_LO_INT64 Int64Regs:$src))> ;
|
|
|
|
|
2016-04-01 09:09:05 +08:00
|
|
|
// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so
|
|
|
|
// no side effects.
|
|
|
|
let hasSideEffects = 0 in {
|
|
|
|
def SHF_L_WRAP_B32_IMM
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
|
|
|
|
"shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
|
|
|
|
Requires<[hasHWROT32]>;
|
2014-06-28 02:35:33 +08:00
|
|
|
|
2016-04-01 09:09:05 +08:00
|
|
|
def SHF_L_WRAP_B32_REG
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
|
|
|
|
"shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
|
|
|
|
Requires<[hasHWROT32]>;
|
2014-06-28 02:35:33 +08:00
|
|
|
|
2016-04-01 09:09:05 +08:00
|
|
|
def SHF_R_WRAP_B32_IMM
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
|
|
|
|
"shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
|
|
|
|
Requires<[hasHWROT32]>;
|
2014-06-28 02:35:33 +08:00
|
|
|
|
2016-04-01 09:09:05 +08:00
|
|
|
def SHF_R_WRAP_B32_REG
|
|
|
|
: NVPTXInst<(outs Int32Regs:$dst),
|
|
|
|
(ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
|
|
|
|
"shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
|
|
|
|
Requires<[hasHWROT32]>;
|
|
|
|
}
|
2014-06-28 02:35:33 +08:00
|
|
|
|
|
|
|
// HW version of rotate 64
|
|
|
|
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
|
|
|
|
(PACK_TWO_INT32
|
|
|
|
(SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
|
|
|
|
(GET_LO_INT64 Int64Regs:$src), imm:$amt),
|
|
|
|
(SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
|
|
|
|
(GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
|
|
|
|
Requires<[hasHWROT32]>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
|
|
|
|
(PACK_TWO_INT32
|
|
|
|
(SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
|
|
|
|
(GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
|
|
|
|
(SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
|
|
|
|
(GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
|
|
|
|
Requires<[hasHWROT32]>;
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
|
|
|
|
(PACK_TWO_INT32
|
|
|
|
(SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
|
|
|
|
(GET_HI_INT64 Int64Regs:$src), imm:$amt),
|
|
|
|
(SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
|
|
|
|
(GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
|
|
|
|
Requires<[hasHWROT32]>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
|
|
|
|
(PACK_TWO_INT32
|
|
|
|
(SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
|
|
|
|
(GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
|
|
|
|
(SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
|
|
|
|
(GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
|
|
|
|
Requires<[hasHWROT32]>;
|
|
|
|
|
|
|
|
// SW version of rotate 64
|
|
|
|
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
|
|
|
|
(ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
|
|
|
|
Requires<[noHWROT32]>;
|
|
|
|
def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
|
|
|
|
(ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
|
|
|
|
Requires<[noHWROT32]>;
|
|
|
|
def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
|
|
|
|
(ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
|
|
|
|
Requires<[noHWROT32]>;
|
|
|
|
def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
|
|
|
|
(ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
|
|
|
|
Requires<[noHWROT32]>;
|
|
|
|
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
//-----------------------------------
|
|
|
|
// Texture Intrinsics
|
|
|
|
//-----------------------------------
|
|
|
|
|
|
|
|
// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
|
|
|
|
// also defined in NVPTXReplaceImageHandles.cpp
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
// texmode_independent
|
|
|
|
let IsTex = 1, IsTexModeUnified = 0 in {
|
2014-04-09 23:39:15 +08:00
|
|
|
// Texture fetch instructions using handles
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_F32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_1D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_1D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_S32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_S32_F32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_S32_F32_LEVEL
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_S32_F32_GRAD
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_1D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_1D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_F32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_S32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_S32_F32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_S32_F32_LEVEL
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_S32_F32_GRAD
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_1D_ARRAY_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_1D_ARRAY_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_F32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_S32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_S32_F32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_S32_F32_LEVEL
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_S32_F32_GRAD
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_F32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_S32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_S32_F32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_S32_F32_LEVEL
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_S32_F32_GRAD
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_2D_ARRAY_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_2D_ARRAY_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_F32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_3D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_3D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_3D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_S32_S32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_S32_F32
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_S32_F32_LEVEL
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_S32_F32_GRAD
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-04-09 23:39:15 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_3D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def TEX_CUBE_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_CUBE_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TEX_CUBE_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def TLD4_R_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TLD4_G_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TLD4_B_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TLD4_A_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TLD4_R_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def TLD4_G_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_B_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_A_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_R_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_G_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_B_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_A_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, $s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// texmode_unified
|
|
|
|
let IsTex = 1, IsTexModeUnified = 1 in {
|
|
|
|
// Texture fetch instructions using handles
|
|
|
|
def TEX_UNIFIED_1D_F32_S32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_S32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_S32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
|
2014-07-17 19:59:04 +08:00
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_F32_S32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_S32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$gradx, Float32Regs:$grady),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_2D_F32_S32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_S32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_S32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_F32_S32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_S32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
|
|
|
|
Float32Regs:$y,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$grady0, Float32Regs:$grady1),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
|
|
|
|
"\\{$grady0, $grady1\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_3D_F32_S32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_F32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_S32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_S32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_U32_S32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z, Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_3D_U32_F32_GRAD
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
|
|
|
|
Float32Regs:$z,
|
|
|
|
Float32Regs:$gradx0, Float32Regs:$gradx1,
|
|
|
|
Float32Regs:$gradx2, Float32Regs:$grady0,
|
|
|
|
Float32Regs:$grady1, Float32Regs:$grady2),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
|
|
|
|
"\\{$grady0, $grady1, $grady2, $grady2\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_CUBE_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
|
|
|
|
Float32Regs:$b, Float32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$t, Int32Regs:$l,
|
|
|
|
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
|
|
|
|
Float32Regs:$lod),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def TLD4_UNIFIED_R_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_G_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_B_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_A_2D_F32_F32
|
|
|
|
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
|
|
|
|
Float32Regs:$v2, Float32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_R_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_G_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_B_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_A_2D_S32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_R_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_G_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_B_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def TLD4_UNIFIED_A_2D_U32_F32
|
|
|
|
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
|
|
|
|
Int32Regs:$v2, Int32Regs:$v3),
|
|
|
|
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
|
2017-01-18 08:09:36 +08:00
|
|
|
"tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
|
2014-07-17 19:59:04 +08:00
|
|
|
"[$t, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//=== Surface load instructions
|
|
|
|
// .clamp variant
|
|
|
|
let IsSuld = 1 in {
|
|
|
|
def SULD_1D_I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 2 in {
|
|
|
|
def SULD_1D_V2I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V2I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V2I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V2I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_V2I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I64_CLAMP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 3 in {
|
|
|
|
def SULD_1D_V4I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V4I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V4I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V4I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SULD_3D_V4I8_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I16_CLAMP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I32_CLAMP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// .trap variant
|
|
|
|
let IsSuld = 1 in {
|
|
|
|
def SULD_1D_I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 2 in {
|
|
|
|
def SULD_1D_V2I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V2I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V2I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V2I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_V2I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I64_TRAP
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 3 in {
|
|
|
|
def SULD_1D_V4I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V4I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V4I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V4I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SULD_3D_V4I8_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I16_TRAP
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I32_TRAP
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
// .zero variant
|
|
|
|
let IsSuld = 1 in {
|
|
|
|
def SULD_1D_I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
2014-04-09 23:39:15 +08:00
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
2014-07-17 19:59:04 +08:00
|
|
|
"suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 2 in {
|
|
|
|
def SULD_1D_V2I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V2I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V2I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V2I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V2I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V2I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V2I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V2I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_3D_V2I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V2I64_ZERO
|
|
|
|
: NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let IsSuld = 3 in {
|
|
|
|
def SULD_1D_V4I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_V4I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x),
|
|
|
|
"suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_1D_ARRAY_V4I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_1D_ARRAY_V4I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
|
|
|
|
"suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_V4I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_V4I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
def SULD_2D_ARRAY_V4I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_2D_ARRAY_V4I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
|
|
|
|
"suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$l, $x, $y, $y\\}];",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SULD_3D_V4I8_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I16_ZERO
|
|
|
|
: NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
def SULD_3D_V4I32_ZERO
|
|
|
|
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
|
|
|
|
"suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
|
|
|
|
"[$s, \\{$x, $y, $z, $z\\}];",
|
|
|
|
[]>;
|
|
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Texture Query Intrinsics
|
|
|
|
//-----------------------------------
|
2014-07-17 22:51:33 +08:00
|
|
|
|
|
|
|
let IsSurfTexQuery = 1 in {
|
2014-07-17 19:59:04 +08:00
|
|
|
def TXQ_CHANNEL_ORDER
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.channel_order.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_CHANNEL_DATA_TYPE
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.channel_data_type.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_WIDTH
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.width.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_HEIGHT
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.height.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_DEPTH
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.depth.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_ARRAY_SIZE
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.array_size.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_NUM_SAMPLES
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.num_samples.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def TXQ_NUM_MIPMAP_LEVELS
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"txq.num_mipmap_levels.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
2014-07-17 22:51:33 +08:00
|
|
|
}
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
|
|
|
|
(TXQ_CHANNEL_ORDER Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
|
|
|
|
(TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_width Int64Regs:$a),
|
|
|
|
(TXQ_WIDTH Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_height Int64Regs:$a),
|
|
|
|
(TXQ_HEIGHT Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
|
|
|
|
(TXQ_DEPTH Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
|
|
|
|
(TXQ_ARRAY_SIZE Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
|
|
|
|
(TXQ_NUM_SAMPLES Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
|
|
|
|
(TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------
|
|
|
|
// Surface Query Intrinsics
|
|
|
|
//-----------------------------------
|
2014-07-17 22:51:33 +08:00
|
|
|
|
|
|
|
let IsSurfTexQuery = 1 in {
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUQ_CHANNEL_ORDER
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.channel_order.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def SUQ_CHANNEL_DATA_TYPE
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.channel_data_type.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def SUQ_WIDTH
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.width.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def SUQ_HEIGHT
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.height.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def SUQ_DEPTH
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.depth.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
|
|
|
def SUQ_ARRAY_SIZE
|
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"suq.array_size.b32 \t$d, [$a];",
|
|
|
|
[]>;
|
2014-07-17 22:51:33 +08:00
|
|
|
}
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
|
|
|
|
(SUQ_CHANNEL_ORDER Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
|
|
|
|
(SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_suq_width Int64Regs:$a),
|
|
|
|
(SUQ_WIDTH Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_suq_height Int64Regs:$a),
|
|
|
|
(SUQ_HEIGHT Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
|
|
|
|
(SUQ_DEPTH Int64Regs:$a)>;
|
|
|
|
def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
|
|
|
|
(SUQ_ARRAY_SIZE Int64Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
//===- Handle Query -------------------------------------------------------===//
|
|
|
|
|
|
|
|
// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
|
|
|
|
def ISTYPEP_SAMPLER
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"istypep.samplerref \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
|
|
|
|
def ISTYPEP_SURFACE
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"istypep.surfref \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
|
|
|
|
def ISTYPEP_TEXTURE
|
|
|
|
: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
|
|
|
|
"istypep.texref \t$d, $a;",
|
|
|
|
[(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
|
|
|
|
|
|
|
|
//===- Surface Stores -----------------------------------------------------===//
|
|
|
|
|
|
|
|
let IsSust = 1 in {
|
|
|
|
// Unformatted
|
|
|
|
// .clamp variant
|
|
|
|
def SUST_B_1D_B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_B_1D_ARRAY_B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V2B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V2B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V2B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V2B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_B_2D_B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
"sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
"sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_B_2D_ARRAY_B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V2B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V2B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V2B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V2B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V4B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V4B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V4B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_B_3D_B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V2B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V2B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V2B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V2B64_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V4B8_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B16_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B32_CLAMP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
// .trap variant
|
|
|
|
def SUST_B_1D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def SUST_B_1D_ARRAY_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
def SUST_B_2D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
"sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
"sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_B_2D_ARRAY_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B64_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
// .zero variant
|
|
|
|
def SUST_B_1D_B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V2B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_V2B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_V2B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_V2B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_V4B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_V4B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_V4B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
"sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V2B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V2B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_1D_ARRAY_V4B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V4B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_1D_ARRAY_V4B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
"sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V2B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_V2B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_V2B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_V2B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
"sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_V4B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_V4B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_V4B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V2B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V2B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_2D_ARRAY_V4B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V4B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_2D_ARRAY_V4B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
2014-04-09 23:39:15 +08:00
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
"sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V2B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V2B64_ZERO
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
"sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_B_3D_V4B8_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B16_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
def SUST_B_3D_V4B32_ZERO
|
2014-04-09 23:39:15 +08:00
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
2014-07-17 19:59:04 +08:00
|
|
|
"sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
|
2014-04-09 23:39:15 +08:00
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
// Formatted
|
|
|
|
|
|
|
|
def SUST_P_1D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
|
|
|
|
Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
|
|
|
|
Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_P_1D_ARRAY_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
"sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
"sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_1D_ARRAY_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_P_2D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
"sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
"sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g),
|
|
|
|
"sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
"sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
|
|
|
|
Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_P_2D_ARRAY_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_2D_ARRAY_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
|
|
|
|
|
|
|
|
def SUST_P_3D_B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
"sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
"sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V2B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V2B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
"sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V2B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
"sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V4B8_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V4B16_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
"sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
|
|
|
def SUST_P_3D_V4B32_TRAP
|
|
|
|
: NVPTXInst<(outs),
|
|
|
|
(ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
"sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
|
|
|
|
"\\{$r, $g, $b, $a\\};",
|
|
|
|
[]>;
|
2014-07-17 19:59:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Surface store instruction patterns
|
|
|
|
// I'm not sure why we can't just include these in the instruction definitions,
|
|
|
|
// but TableGen complains of type errors :(
|
|
|
|
|
|
|
|
// .clamp variant
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B8_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B16_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
(SUST_B_3D_B32_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
(SUST_B_3D_B64_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
2014-04-09 23:39:15 +08:00
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
// .trap variant
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
(SUST_B_3D_B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
(SUST_B_3D_B64_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_3D_V2B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_3D_V2B64_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_3D_V4B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
2014-07-17 19:59:04 +08:00
|
|
|
// .zero variant
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
|
|
|
|
(SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
|
|
|
|
(SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
|
|
|
|
Int64Regs:$g),
|
|
|
|
(SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B8_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_B_3D_B16_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
(SUST_B_3D_B32_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r),
|
|
|
|
(SUST_B_3D_B64_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B8_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_B_3D_V2B16_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_B_3D_V2B32_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g),
|
|
|
|
(SUST_B_3D_V2B64_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int64Regs:$r, Int64Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B8_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_B_3D_V4B16_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_B_3D_V4B32_ZERO Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
2014-04-09 23:39:15 +08:00
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
|
|
|
|
(SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
|
|
|
|
(SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
|
|
|
|
(SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
|
|
|
|
(SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
|
|
|
|
Int32Regs:$g),
|
|
|
|
(SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
|
|
|
|
Int32Regs:$x, Int32Regs:$y,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_P_3D_B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r),
|
|
|
|
(SUST_P_3D_B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r),
|
|
|
|
(SUST_P_3D_B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_3D_V2B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g),
|
|
|
|
(SUST_P_3D_V2B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g),
|
|
|
|
(SUST_P_3D_V2B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_3D_V4B8_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
|
|
|
|
(SUST_P_3D_V4B16_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
|
|
|
|
|
|
|
|
def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
|
|
|
|
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
|
|
|
|
(SUST_P_3D_V4B32_TRAP Int64Regs:$s,
|
|
|
|
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
|
|
|
|
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
|
|
|
|
|
2016-07-08 00:40:17 +08:00
|
|
|
//-----------------------------------
|
|
|
|
// Read Special Registers
|
|
|
|
//-----------------------------------
|
2014-04-09 23:39:15 +08:00
|
|
|
|
2016-07-08 00:40:17 +08:00
|
|
|
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
|
2012-05-05 04:18:50 +08:00
|
|
|
: NVPTXInst<(outs Int64Regs:$d), (ins),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("mov.u64 \t$d, %", regname, ";"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int64Regs:$d, (intop))]>;
|
|
|
|
|
2016-07-08 00:40:17 +08:00
|
|
|
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
|
2012-05-05 04:18:50 +08:00
|
|
|
: NVPTXInst<(outs Int32Regs:$d), (ins),
|
2017-01-18 08:09:19 +08:00
|
|
|
!strconcat("mov.u32 \t$d, %", regname, ";"),
|
2012-05-05 04:18:50 +08:00
|
|
|
[(set Int32Regs:$d, (intop))]>;
|
|
|
|
|
|
|
|
// TODO Add read vector-version of special registers
|
|
|
|
|
2016-07-08 00:40:17 +08:00
|
|
|
def INT_PTX_SREG_TID_X :
|
|
|
|
PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
|
|
|
|
def INT_PTX_SREG_TID_Y :
|
|
|
|
PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
|
|
|
|
def INT_PTX_SREG_TID_Z :
|
|
|
|
PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
|
|
|
|
def INT_PTX_SREG_TID_W :
|
|
|
|
PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_NTID_X :
|
|
|
|
PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
|
|
|
|
def INT_PTX_SREG_NTID_Y :
|
|
|
|
PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
|
|
|
|
def INT_PTX_SREG_NTID_Z :
|
|
|
|
PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
|
|
|
|
def INT_PTX_SREG_NTID_W :
|
|
|
|
PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_LANEID :
|
|
|
|
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
|
|
|
|
def INT_PTX_SREG_WARPID :
|
|
|
|
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
|
|
|
|
def INT_PTX_SREG_NWARPID :
|
|
|
|
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_CTAID_X :
|
|
|
|
PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
|
|
|
|
def INT_PTX_SREG_CTAID_Y :
|
|
|
|
PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
|
|
|
|
def INT_PTX_SREG_CTAID_Z :
|
|
|
|
PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
|
|
|
|
def INT_PTX_SREG_CTAID_W :
|
|
|
|
PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_NCTAID_X :
|
|
|
|
PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
|
|
|
|
def INT_PTX_SREG_NCTAID_Y :
|
|
|
|
PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
|
|
|
|
def INT_PTX_SREG_NCTAID_Z :
|
|
|
|
PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
|
|
|
|
def INT_PTX_SREG_NCTAID_W :
|
|
|
|
PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_SMID :
|
|
|
|
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
|
|
|
|
def INT_PTX_SREG_NSMID :
|
|
|
|
PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
|
|
|
|
def INT_PTX_SREG_GRIDID :
|
|
|
|
PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_LANEMASK_EQ :
|
|
|
|
PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
|
|
|
|
def INT_PTX_SREG_LANEMASK_LE :
|
|
|
|
PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
|
|
|
|
def INT_PTX_SREG_LANEMASK_LT :
|
|
|
|
PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
|
|
|
|
def INT_PTX_SREG_LANEMASK_GE :
|
|
|
|
PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
|
|
|
|
def INT_PTX_SREG_LANEMASK_GT :
|
|
|
|
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_CLOCK :
|
|
|
|
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
|
|
|
|
def INT_PTX_SREG_CLOCK64 :
|
|
|
|
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
|
|
|
|
|
|
|
|
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
|
|
|
|
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
|
|
|
|
def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
|
|
|
|
def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
|
|
|
|
|
|
|
|
// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
|
|
|
|
// handle the constant.
|
|
|
|
def INT_PTX_SREG_WARPSIZE :
|
|
|
|
NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
|
|
|
|
[(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
|
2017-10-13 02:27:55 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
|
|
|
|
//
|
2018-03-16 05:40:56 +08:00
|
|
|
|
|
|
|
class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
|
|
|
|
string Space, string Type, NVPTXRegClass regclass,
|
|
|
|
DAGOperand SrcOp, bit WithStride>
|
2018-04-19 05:51:48 +08:00
|
|
|
: EmptyNVPTXInst,
|
|
|
|
Requires<[!if(!eq(Geometry, "m16n16k16"),
|
|
|
|
hasPTX60,
|
|
|
|
hasPTX61),
|
|
|
|
hasSM70]> {
|
2018-03-21 01:18:59 +08:00
|
|
|
// Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
|
|
|
|
// for this function.
|
2018-03-22 05:55:02 +08:00
|
|
|
PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
|
|
|
|
# Geometry # "_load_"
|
|
|
|
# !subst("c", "c_" # Type, Abc)
|
2018-03-21 01:18:59 +08:00
|
|
|
# "_" # Layout
|
|
|
|
# !subst(".", "_", Space)
|
|
|
|
# !if(WithStride,"_stride", "")
|
|
|
|
# "_Intr");
|
2018-03-16 05:40:56 +08:00
|
|
|
dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
|
|
|
|
dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
|
|
|
|
dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
|
|
|
|
|
|
|
|
dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
|
|
|
|
dag Ins = !con((ins SrcOp:$src), StrideArg);
|
|
|
|
|
|
|
|
// Build a dag pattern that matches the intrinsic call.
|
|
|
|
// We want a dag that looks like this:
|
|
|
|
// (set <output args>, (intrinsic <input arguments>)) where input and
|
|
|
|
// output arguments are named patterns that would match corresponding
|
|
|
|
// input/output arguments of the instruction.
|
|
|
|
//
|
|
|
|
// First we construct (set <output arguments>) from instruction's outs dag by
|
|
|
|
// replacing dag operator 'outs' with 'set'.
|
|
|
|
dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
|
|
|
|
// Similarly, construct (intrinsic <input arguments>) sub-dag from
|
|
|
|
// instruction's input arguments, only now we also need to replace operands
|
|
|
|
// with patterns that would match them and the operator 'ins' with the
|
|
|
|
// intrinsic.
|
|
|
|
dag PatArgs = !foreach(tmp, Ins,
|
|
|
|
!subst(imem, ADDRvar,
|
|
|
|
!subst(MEMri64, ADDRri64,
|
|
|
|
!subst(MEMri, ADDRri,
|
2018-03-21 01:18:59 +08:00
|
|
|
!subst(ins, IntrMatcher, tmp)))));
|
2018-03-16 05:40:56 +08:00
|
|
|
// Finally, consatenate both parts together. !con() requires both dags to have
|
|
|
|
// the same operator, so we wrap PatArgs in a (set ...) dag.
|
|
|
|
let Pattern = [!con(PatOuts, (set PatArgs))];
|
|
|
|
let OutOperandList = Outs;
|
|
|
|
let InOperandList = Ins;
|
2018-03-22 05:55:02 +08:00
|
|
|
let AsmString = "wmma.load."
|
|
|
|
# Abc
|
2018-04-19 05:51:48 +08:00
|
|
|
# ".sync"
|
|
|
|
# "." # Layout
|
|
|
|
# "." # Geometry
|
|
|
|
# Space
|
2018-03-22 05:55:02 +08:00
|
|
|
# "." # Type # " \t"
|
|
|
|
# !if(!eq(Abc#Type, "cf16"),
|
|
|
|
"{{$r0, $r1, $r2, $r3}}",
|
|
|
|
"{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
|
|
|
|
# ", [$src]"
|
|
|
|
# !if(WithStride, ", $ldm", "")
|
|
|
|
# ";";
|
2018-03-16 05:40:56 +08:00
|
|
|
}
|
2017-10-13 02:27:55 +08:00
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
|
|
|
|
string Space, string Type, bit WithStride>
|
2018-03-21 01:18:59 +08:00
|
|
|
: PatFrag <(ops),(ops)> {
|
|
|
|
// Intrinsic that matches this instruction.
|
2018-03-22 05:55:02 +08:00
|
|
|
Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
|
|
|
|
# "_" # Geometry # "_load_"
|
|
|
|
# Abc # "_" # Type # "_" # Layout
|
2018-03-21 01:18:59 +08:00
|
|
|
# !if(WithStride,"_stride", ""));
|
|
|
|
code match_generic = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
|
|
|
|
}];
|
|
|
|
code match_shared = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
|
|
|
|
}];
|
|
|
|
code match_global = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
|
|
|
|
}];
|
|
|
|
|
|
|
|
let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
|
[TableGen] Support multi-alternative pattern fragments
A TableGen instruction record usually contains a DAG pattern that will
describe the SelectionDAG operation that can be implemented by this
instruction. However, there will be cases where several different DAG
patterns can all be implemented by the same instruction. The way to
represent this today is to write additional patterns in the Pattern
(or usually Pat) class that map those extra DAG patterns to the
instruction. This usually also works fine.
However, I've noticed cases where the current setup seems to require
quite a bit of extra (and duplicated) text in the target .td files.
For example, in the SystemZ back-end, there are quite a number of
instructions that can implement an "add-with-overflow" operation.
The same instructions also need to be used to implement just plain
addition (simply ignoring the extra overflow output). The current
solution requires creating extra Pat pattern for every instruction,
duplicating the information about which particular add operands
map best to which particular instruction.
This patch enhances TableGen to support a new PatFrags class, which
can be used to encapsulate multiple alternative patterns that may
all match to the same instruction. It operates the same way as the
existing PatFrag class, except that it accepts a list of DAG patterns
to match instead of just a single one. As an example, we can now define
a PatFrags to match either an "add-with-overflow" or a regular add
operation:
def z_sadd : PatFrags<(ops node:$src1, node:$src2),
[(z_saddo node:$src1, node:$src2),
(add node:$src1, node:$src2)]>;
and then use this in the add instruction pattern:
defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>;
These SystemZ target changes are implemented here as well.
Note that PatFrag is now defined as a subclass of PatFrags, which
means that some users of internals of PatFrag need to be updated.
(E.g. instead of using PatFrag.Fragment you now need to use
!head(PatFrag.Fragments).)
The implementation is based on the following main ideas:
- InlinePatternFragments may now replace each original pattern
with several result patterns, not just one.
- parseInstructionPattern delays calling InlinePatternFragments
and InferAllTypes. Instead, it extracts a single DAG match
pattern from the main instruction pattern.
- Processing of the DAG match pattern part of the main instruction
pattern now shares most code with processing match patterns from
the Pattern class.
- Direct use of main instruction patterns in InferFromPattern and
EmitResultInstructionAsOperand is removed; everything now operates
solely on DAG match patterns.
Reviewed by: hfinkel
Differential Revision: https://reviews.llvm.org/D48545
llvm-svn: 336999
2018-07-13 21:18:00 +08:00
|
|
|
let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
|
2018-03-21 01:18:59 +08:00
|
|
|
let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
|
|
|
|
!if(!eq(Space, ".global"), match_global, match_generic));
|
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
|
|
|
|
string Space, string Type, NVPTXRegClass regclass,
|
|
|
|
bit WithStride> {
|
|
|
|
def _avar: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
imem, WithStride>;
|
|
|
|
def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
Int32Regs, WithStride>;
|
|
|
|
def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
Int64Regs, WithStride>;
|
|
|
|
def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
MEMri, WithStride>;
|
|
|
|
def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
MEMri64, WithStride>;
|
2018-03-21 01:18:59 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
|
|
|
|
string Space, string Type, NVPTXRegClass regclass,
|
|
|
|
bit WithStride> {
|
2018-03-21 01:18:59 +08:00
|
|
|
// Define a PatFrag that matches appropriate intrinsic that loads from the
|
|
|
|
// given address space.
|
2018-03-22 05:55:02 +08:00
|
|
|
def _Intr: WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
|
|
|
|
WithStride>;
|
|
|
|
defm NAME: WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
|
|
|
|
WithStride>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
|
|
|
|
string Space, string Type, NVPTXRegClass regclass> {
|
|
|
|
defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
|
|
|
|
defm NAME: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
|
|
|
|
string Type, NVPTXRegClass regclass> {
|
|
|
|
defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
|
|
|
|
Type, regclass>;
|
|
|
|
defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
|
|
|
|
Type, regclass>;
|
|
|
|
defm NAME: WMMA_LOAD_GALST<Geometry, Abc, Layout, "",
|
|
|
|
Type, regclass>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
|
|
|
|
string Type, NVPTXRegClass regclass> {
|
|
|
|
defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
|
|
|
|
defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_LOAD_G<string Geometry> {
|
|
|
|
defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
|
|
|
|
defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
|
|
|
|
defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
|
|
|
|
defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
|
|
|
|
}
|
|
|
|
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
|
2018-03-22 05:55:02 +08:00
|
|
|
defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
|
2017-10-13 02:27:55 +08:00
|
|
|
|
|
|
|
//
|
|
|
|
// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
|
|
|
|
//
|
2018-03-22 05:55:02 +08:00
|
|
|
class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
|
|
|
|
string Type, NVPTXRegClass regclass,
|
|
|
|
bit WithStride, DAGOperand DstOp>
|
2018-04-19 05:51:48 +08:00
|
|
|
: EmptyNVPTXInst,
|
|
|
|
Requires<[!if(!eq(Geometry, "m16n16k16"),
|
|
|
|
hasPTX60,
|
|
|
|
hasPTX61),
|
|
|
|
hasSM70]> {
|
2018-03-22 05:55:02 +08:00
|
|
|
PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
|
|
|
|
# "_" # Geometry # "_store_d"
|
2018-03-21 01:18:59 +08:00
|
|
|
# "_" # Type
|
|
|
|
# "_" # Layout
|
|
|
|
# !subst(".", "_", Space)
|
|
|
|
# !if(WithStride,"_stride", "")
|
|
|
|
# "_Intr");
|
2018-03-22 05:55:02 +08:00
|
|
|
dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
|
|
|
|
regclass:$r2, regclass:$r3);
|
|
|
|
dag InsR47 = (ins regclass:$r4, regclass:$r5,
|
|
|
|
regclass:$r6, regclass:$r7);
|
2018-03-16 05:40:56 +08:00
|
|
|
dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
|
|
|
|
dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
|
|
|
|
dag Ins = !con(InsR, StrideArg);
|
|
|
|
|
|
|
|
// Construct the pattern to match corresponding intrinsic call. See the
|
|
|
|
// details in the comments in WMMA_LOAD_ALSTOS.
|
|
|
|
dag PatArgs = !foreach(tmp, Ins,
|
|
|
|
!subst(imem, ADDRvar,
|
|
|
|
!subst(MEMri64, ADDRri64,
|
|
|
|
!subst(MEMri, ADDRri,
|
2018-03-21 01:18:59 +08:00
|
|
|
!subst(ins, IntrMatcher, tmp)))));
|
2018-03-16 05:40:56 +08:00
|
|
|
let Pattern = [PatArgs];
|
|
|
|
let OutOperandList = (outs);
|
|
|
|
let InOperandList = Ins;
|
|
|
|
let AsmString = "wmma.store.d.sync."
|
|
|
|
# Layout
|
2018-03-22 05:55:02 +08:00
|
|
|
# "." # Geometry
|
2018-03-16 05:40:56 +08:00
|
|
|
# Space
|
|
|
|
# "." # Type
|
|
|
|
# " \t[$src],"
|
|
|
|
# !if(!eq(Type,"f16"),
|
|
|
|
"{{$r0, $r1, $r2, $r3}}",
|
|
|
|
"{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
|
|
|
|
# !if(WithStride, ", $ldm", "")
|
|
|
|
# ";";
|
|
|
|
|
|
|
|
}
|
2017-10-13 02:27:55 +08:00
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
|
2018-03-21 01:18:59 +08:00
|
|
|
string Type, bit WithStride>
|
|
|
|
: PatFrag <(ops),(ops)> {
|
|
|
|
// Intrinsic that matches this instruction.
|
2018-03-22 05:55:02 +08:00
|
|
|
Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
|
|
|
|
# Geometry
|
|
|
|
# "_store_d"
|
2018-03-21 01:18:59 +08:00
|
|
|
# "_" # Type
|
|
|
|
# "_" # Layout
|
|
|
|
# !if(WithStride, "_stride", ""));
|
|
|
|
code match_generic = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
|
|
|
|
}];
|
|
|
|
code match_shared = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
|
|
|
|
}];
|
|
|
|
code match_global = [{
|
|
|
|
return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
|
|
|
|
}];
|
|
|
|
|
|
|
|
dag Args = !if(!eq(Type,"f16"),
|
|
|
|
(ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
|
|
|
|
(ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
|
|
|
|
node:$r4, node:$r5, node:$r6, node:$r7));
|
|
|
|
dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
|
|
|
|
let Operands = !con(Args, StrideArg);
|
[TableGen] Support multi-alternative pattern fragments
A TableGen instruction record usually contains a DAG pattern that will
describe the SelectionDAG operation that can be implemented by this
instruction. However, there will be cases where several different DAG
patterns can all be implemented by the same instruction. The way to
represent this today is to write additional patterns in the Pattern
(or usually Pat) class that map those extra DAG patterns to the
instruction. This usually also works fine.
However, I've noticed cases where the current setup seems to require
quite a bit of extra (and duplicated) text in the target .td files.
For example, in the SystemZ back-end, there are quite a number of
instructions that can implement an "add-with-overflow" operation.
The same instructions also need to be used to implement just plain
addition (simply ignoring the extra overflow output). The current
solution requires creating extra Pat pattern for every instruction,
duplicating the information about which particular add operands
map best to which particular instruction.
This patch enhances TableGen to support a new PatFrags class, which
can be used to encapsulate multiple alternative patterns that may
all match to the same instruction. It operates the same way as the
existing PatFrag class, except that it accepts a list of DAG patterns
to match instead of just a single one. As an example, we can now define
a PatFrags to match either an "add-with-overflow" or a regular add
operation:
def z_sadd : PatFrags<(ops node:$src1, node:$src2),
[(z_saddo node:$src1, node:$src2),
(add node:$src1, node:$src2)]>;
and then use this in the add instruction pattern:
defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, z_sadd, GR32, GR32>;
These SystemZ target changes are implemented here as well.
Note that PatFrag is now defined as a subclass of PatFrags, which
means that some users of internals of PatFrag need to be updated.
(E.g. instead of using PatFrag.Fragment you now need to use
!head(PatFrag.Fragments).)
The implementation is based on the following main ideas:
- InlinePatternFragments may now replace each original pattern
with several result patterns, not just one.
- parseInstructionPattern delays calling InlinePatternFragments
and InferAllTypes. Instead, it extracts a single DAG match
pattern from the main instruction pattern.
- Processing of the DAG match pattern part of the main instruction
pattern now shares most code with processing match patterns from
the Pattern class.
- Direct use of main instruction patterns in InferFromPattern and
EmitResultInstructionAsOperand is removed; everything now operates
solely on DAG match patterns.
Reviewed by: hfinkel
Differential Revision: https://reviews.llvm.org/D48545
llvm-svn: 336999
2018-07-13 21:18:00 +08:00
|
|
|
let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
|
2018-03-21 01:18:59 +08:00
|
|
|
let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
|
|
|
|
!if(!eq(Space, ".global"), match_global, match_generic));
|
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
|
|
|
|
string Type, NVPTXRegClass regclass,
|
|
|
|
bit WithStride> {
|
|
|
|
def _avar: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride, imem>;
|
|
|
|
def _areg: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride, Int32Regs>;
|
|
|
|
def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride, Int64Regs>;
|
|
|
|
def _ari: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride, MEMri>;
|
|
|
|
def _ari64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride, MEMri64>;
|
2018-03-21 01:18:59 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
|
|
|
|
string Type, NVPTXRegClass regclass,
|
|
|
|
bit WithStride> {
|
2018-03-21 01:18:59 +08:00
|
|
|
// Define a PatFrag that matches appropriate intrinsic that loads from the
|
|
|
|
// given address space.
|
2018-03-22 05:55:02 +08:00
|
|
|
def _Intr: WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
|
|
|
|
WithStride>;
|
|
|
|
defm NAME: WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
|
|
|
|
WithStride>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
|
2018-03-21 01:18:59 +08:00
|
|
|
string Type, NVPTXRegClass regclass > {
|
2018-03-22 05:55:02 +08:00
|
|
|
defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
|
|
|
|
defm NAME: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
|
2017-10-13 02:27:55 +08:00
|
|
|
string Type, NVPTXRegClass regclass> {
|
2018-03-22 05:55:02 +08:00
|
|
|
defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
|
|
|
|
defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
|
|
|
|
defm NAME: WMMA_STORE_D_GLST<Geometry, Layout, "", Type, regclass>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_GT<string Geometry, string Type,
|
|
|
|
NVPTXRegClass regclass> {
|
|
|
|
defm _row: WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
|
|
|
|
defm _col: WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_STORE_D_G<string Geometry> {
|
|
|
|
defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
|
|
|
|
defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
|
|
|
|
}
|
|
|
|
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
|
2018-03-22 05:55:02 +08:00
|
|
|
defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
|
2017-10-13 02:27:55 +08:00
|
|
|
|
|
|
|
// WMMA.MMA
|
2018-03-22 05:55:02 +08:00
|
|
|
class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
|
2017-10-13 02:27:55 +08:00
|
|
|
string DType, NVPTXRegClass d_reg,
|
|
|
|
string CType, NVPTXRegClass c_reg,
|
|
|
|
NVPTXRegClass ab_reg,
|
|
|
|
string Satfinite = "">
|
2018-04-19 05:51:48 +08:00
|
|
|
: EmptyNVPTXInst,
|
|
|
|
Requires<[!if(!eq(Geometry, "m16n16k16"),
|
|
|
|
hasPTX60,
|
|
|
|
hasPTX61),
|
|
|
|
hasSM70]> {
|
2018-03-22 05:55:02 +08:00
|
|
|
Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
|
|
|
|
# Geometry
|
|
|
|
# "_mma"
|
|
|
|
# "_" # ALayout
|
2018-03-16 05:40:56 +08:00
|
|
|
# "_" # BLayout
|
|
|
|
# "_" # DType
|
|
|
|
# "_" # CType
|
2018-03-22 05:55:02 +08:00
|
|
|
# !subst(".", "_", Satfinite));
|
2018-03-16 05:40:56 +08:00
|
|
|
dag Outs = !if(!eq(DType,"f16"),
|
|
|
|
(outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
|
|
|
|
(outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
|
|
|
|
d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
|
|
|
|
dag InsExtraCArgs = !if(!eq(CType,"f16"),
|
|
|
|
(ins),
|
|
|
|
(ins c_reg:$c4, c_reg:$c5, c_reg:$c6, c_reg:$c7));
|
|
|
|
dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
|
|
|
|
ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
|
|
|
|
ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
|
|
|
|
ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
|
|
|
|
c_reg:$c0, c_reg:$c1, c_reg:$c2, c_reg:$c3),
|
|
|
|
InsExtraCArgs);
|
|
|
|
|
|
|
|
// Construct the pattern to match corresponding intrinsic call. See the
|
|
|
|
// details in the comments in WMMA_LOAD_ALSTOS.
|
|
|
|
dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
|
|
|
|
dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
|
|
|
|
let Pattern = [!con(PatOuts, (set PatArgs))];
|
|
|
|
let OutOperandList = Outs;
|
|
|
|
let InOperandList = Ins;
|
|
|
|
let AsmString = "wmma.mma.sync."
|
|
|
|
# ALayout
|
|
|
|
# "." # BLayout
|
2018-04-19 05:51:48 +08:00
|
|
|
# "." # Geometry
|
2018-03-16 05:40:56 +08:00
|
|
|
# "." # DType
|
|
|
|
# "." # CType
|
|
|
|
# Satfinite # "\n\t\t"
|
|
|
|
# !if(!eq(DType,"f16"),
|
|
|
|
"{{$d0, $d1, $d2, $d3}}, \n\t\t",
|
|
|
|
"{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
|
|
|
|
# "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
|
|
|
|
# "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
|
|
|
|
# !if(!eq(CType,"f16"),
|
|
|
|
"{{$c0, $c1, $c2, $c3}};",
|
|
|
|
"{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
|
|
|
|
}
|
2017-10-13 02:27:55 +08:00
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
|
2017-10-13 02:27:55 +08:00
|
|
|
string DType, NVPTXRegClass d_reg,
|
|
|
|
string CType, NVPTXRegClass c_reg> {
|
2018-03-22 05:55:02 +08:00
|
|
|
def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
|
2017-10-13 02:27:55 +08:00
|
|
|
DType, d_reg, CType, c_reg,
|
|
|
|
Float16x2Regs, ".satfinite">;
|
2018-03-22 05:55:02 +08:00
|
|
|
def NAME: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
|
2017-10-13 02:27:55 +08:00
|
|
|
DType, d_reg, CType, c_reg,
|
|
|
|
Float16x2Regs>;
|
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
|
2017-10-13 02:27:55 +08:00
|
|
|
string DType, NVPTXRegClass d_reg> {
|
2018-03-22 05:55:02 +08:00
|
|
|
defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
|
|
|
|
"f16", Float16x2Regs>;
|
|
|
|
defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
|
|
|
|
"f32", Float32Regs>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
|
|
|
|
defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
|
|
|
|
defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
|
|
|
|
defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
|
|
|
|
defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
|
2017-10-13 02:27:55 +08:00
|
|
|
}
|
|
|
|
|
2018-03-22 05:55:02 +08:00
|
|
|
multiclass WMMA_MMA_G<string Geometry> {
|
|
|
|
defm _col: WMMA_MMA_GA<Geometry, "col">;
|
|
|
|
defm _row: WMMA_MMA_GA<Geometry, "row">;
|
|
|
|
}
|
2017-10-13 02:27:55 +08:00
|
|
|
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
|
2018-03-22 05:55:02 +08:00
|
|
|
defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
|
2018-04-19 05:51:48 +08:00
|
|
|
defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
|