From a659d2590e10703d2382410dfcc9ec02c5c6a673 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 6 Dec 2017 17:50:05 +0000 Subject: [PATCH] [NVPTX,CUDA] Added llvm.nvvm.fns intrinsic and matching __nvvm_fns builtin in clang. Differential Revision: https://reviews.llvm.org/D40872 llvm-svn: 319909 --- clang/include/clang/Basic/BuiltinsNVPTX.def | 3 ++ clang/lib/Headers/__clang_cuda_intrinsics.h | 4 +++ llvm/include/llvm/IR/IntrinsicsNVVM.td | 5 +++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 27 ++++++++++++++++ llvm/test/CodeGen/NVPTX/fns.ll | 36 +++++++++++++++++++++ 5 files changed, 75 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/fns.ll diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index b596793c9c12..7bab73a3b110 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -371,6 +371,9 @@ BUILTIN(__nvvm_bitcast_i2f, "fi", "") BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") +// FNS +TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", "ptx60") + // Sync BUILTIN(__syncthreads, "v", "") diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index 3f14b4f2dded..02d68a2e618e 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -206,6 +206,10 @@ inline __device__ unsigned int __ballot_sync(unsigned int mask, int pred) { inline __device__ unsigned int __activemask() { return __nvvm_vote_ballot(1); } +inline __device__ unsigned int __fns(unsigned mask, unsigned base, int offset) { + return __nvvm_fns(mask, base, offset); +} + #endif // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300 // Define __match* builtins CUDA-9 headers expect to see. diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 6f75e78ff615..73622ce9303f 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -682,6 +682,11 @@ let TargetPrefix = "nvvm" in { def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; +// FNS + + def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; // Atomics not available as llvm intrinsics. def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty], diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 478f3e9d0577..c932758bd0ae 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -979,6 +979,33 @@ def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, Float64Regs, int_nvvm_bitcast_d2ll>; +// +// FNS +// + +class INT_FNS_MBO + : NVPTXInst<(outs Int32Regs:$dst), ins, + "fns.b32 \t$dst, $mask, $base, $offset;", + [(set Int32Regs:$dst, Operands )]>, + Requires<[hasPTX60, hasSM30]>; + +def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; +def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; +def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; +def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), + (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; +def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), + (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; + //----------------------------------- // Atomic Functions //----------------------------------- diff --git a/llvm/test/CodeGen/NVPTX/fns.ll b/llvm/test/CodeGen/NVPTX/fns.ll new file mode 100644 index 000000000000..7673e43449c3 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fns.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s + +declare i32 @llvm.nvvm.fns(i32, i32, i32) + +; CHECK-LABEL: .func{{.*}}fns +define i32 @fns(i32 %mask, i32 %base, i32 %offset) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [fns_param_0]; + ; CHECK: ld.param.u32 [[BASE:%r[0-9]+]], [fns_param_1]; + ; CHECK: ld.param.u32 [[OFFSET:%r[0-9]+]], [fns_param_2]; + + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], [[OFFSET]]; + %r0 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], [[BASE]], 0; + %r1 = call i32 @llvm.nvvm.fns(i32 %mask, i32 %base, i32 0); + %r01 = add i32 %r0, %r1; + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, [[OFFSET]]; + %r2 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, [[MASK]], 1, 0; + %r3 = call i32 @llvm.nvvm.fns(i32 %mask, i32 1, i32 0); + %r23 = add i32 %r2, %r3; + %r0123 = add i32 %r01, %r23; + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], [[OFFSET]]; + %r4 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, [[BASE]], 0; + %r5 = call i32 @llvm.nvvm.fns(i32 2, i32 %base, i32 0); + %r45 = add i32 %r4, %r5; + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, [[OFFSET]]; + %r6 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 %offset); + ; CHECK: fns.b32 {{%r[0-9]+}}, 2, 1, 0; + %r7 = call i32 @llvm.nvvm.fns(i32 2, i32 1, i32 0); + %r67 = add i32 %r6, %r7; + %r4567 = add i32 %r45, %r67; + %r = add i32 %r0123, %r4567; + ret i32 %r; +} +