forked from OSchip/llvm-project
[NVPTX,CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang.
Differential Revision: https://reviews.llvm.org/D40872 llvm-svn: 319909
This commit is contained in:
parent
4631ef1e43
commit
a659d2590e
|
@ -371,6 +371,9 @@ BUILTIN(__nvvm_bitcast_i2f, "fi", "")
|
|||
BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "")
|
||||
BUILTIN(__nvvm_bitcast_d2ll, "LLid", "")
|
||||
|
||||
// FNS
|
||||
TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", "ptx60")
|
||||
|
||||
// Sync
|
||||
|
||||
BUILTIN(__syncthreads, "v", "")
|
||||
|
|
|
@ -206,6 +206,10 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) {
|
|||
|
||||
inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); }
|
||||
|
||||
inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) {
|
||||
return __nvvm_fns(mask, base, offset);
|
||||
}
|
||||
|
||||
#endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
|
||||
|
||||
// Define __match* builtins CUDA-9 headers expect to see.
|
||||
|
|
|
@ -682,6 +682,11 @@ let TargetPrefix = "nvvm" in {
|
|||
def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
|
||||
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
|
||||
|
||||
// FNS
|
||||
|
||||
def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// Atomics not available as llvm intrinsics.
|
||||
def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
|
||||
|
|
|
@ -979,6 +979,33 @@ def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
|
|||
def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
|
||||
Float64Regs, int_nvvm_bitcast_d2ll>;
|
||||
|
||||
//
|
||||
// FNS
|
||||
//
|
||||
|
||||
class INT_FNS_MBO<dag ins, dag Operands>
|
||||
: NVPTXInst<(outs Int32Regs:$dst), ins,
|
||||
"fns.b32 \t$dst, $mask, $base, $offset;",
|
||||
[(set Int32Regs:$dst, Operands )]>,
|
||||
Requires<[hasPTX60, hasSM30]>;
|
||||
|
||||
def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
|
||||
(int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
|
||||
def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset),
|
||||
(int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>;
|
||||
def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset),
|
||||
(int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>;
|
||||
def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset),
|
||||
(int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>;
|
||||
def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
|
||||
(int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
|
||||
def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset),
|
||||
(int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>;
|
||||
def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset),
|
||||
(int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>;
|
||||
def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset),
|
||||
(int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>;
|
||||
|
||||
//-----------------------------------
|
||||
// Atomic Functions
|
||||
//-----------------------------------
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s
|
||||
|
||||
declare i32 @llvm.nvvm.fns(i32, i32, i32)
|
||||
|
||||
; CHECK-LABEL: .func{{.*}}fns
|
||||
define i32 @fns(i32 %mask, i32 %base, i32 %offset) {
|
||||
; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [fns_param_0];
|
||||
; CHECK: ld.param.u32 [[BASE:%r[0-9]+]], [fns_param_1];
|
||||
; CHECK: ld.param.u32 [[OFFSET:%r[0-9]+]], [fns_param_2];
|
||||
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]];
|
||||
%r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset);
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], 0;
|
||||
%r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0);
|
||||
%r01 = add i32 %r0, %r1;
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]];
|
||||
%r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset);
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, 0;
|
||||
%r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0);
|
||||
%r23 = add i32 %r2, %r3;
|
||||
%r0123 = add i32 %r01, %r23;
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]];
|
||||
%r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset);
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], 0;
|
||||
%r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0);
|
||||
%r45 = add i32 %r4, %r5;
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, [[OFFSET]];
|
||||
%r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset);
|
||||
; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, 0;
|
||||
%r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0);
|
||||
%r67 = add i32 %r6, %r7;
|
||||
%r4567 = add i32 %r45, %r67;
|
||||
%r = add i32 %r0123, %r4567;
|
||||
ret i32 %r;
|
||||
}
|
||||
|
Loading…
Reference in New Issue