llvm-project/clang/test/CodeGenCUDA/link-device-bitcode.cu

// Test for linking with CUDA's libdevice as outlined in
// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
//
// REQUIRES: nvptx-registered-target
//
// Prepare bitcode file to link with
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
// RUN:    -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
// RUN:    -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
//
// Make sure function in device-code gets linked in and internalized.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc  -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR

// Make sure legacy flag name works
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-cuda-bitcode %t.bc  -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR
//
// Make sure we can link two bitcode files.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
// RUN:    -emit-llvm -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
//
// Make sure function in device-code gets linked but is not internalized
// without -fcuda-uses-libdevice
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
//
// Make sure NVVMReflect pass is enabled in NVPTX back-end.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
// RUN:    -mllvm -debug-pass=Structure 2>&1 \
// RUN:    | FileCheck %s -check-prefix CHECK-REFLECT

#include "Inputs/cuda.h"

__device__ float device_mul_or_add(float a, float b);
extern "C" __device__ double __nv_sin(double x);
extern "C" __device__ double __nv_exp(double x);

// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
__device__ void should_not_be_internalized(float *data) {}

// Make sure kernel call has not been internalized.
// CHECK-IR-LABEL: define void @_Z6kernelPfS_
// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
__global__ __attribute__((used)) void kernel(float *out, float *in) {
  *out = device_mul_or_add(in[0], in[1]);
  *out += __nv_exp(__nv_sin(*out));
  should_not_be_internalized(out);
}

// Make sure device_mul_or_add() is present in IR, is internal and
// calls __nvvm_reflect().
// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
// CHECK-IR: call i32 @__nvvm_reflect
// CHECK-IR: ret float

// Make sure we've linked in and internalized only needed functions
// from the second bitcode file.
// CHECK-IR-2-LABEL: define internal double @__nv_sin
// CHECK-IR-2-LABEL: define internal double @__nv_exp
// CHECK-IR-2-NOT: double @__unused

// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`// Test for linking with CUDA's libdevice as outlined in`
			`// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice`
			`//`
			`// REQUIRES: nvptx-registered-target`
			`//`
			`// Prepare bitcode file to link with`
[CUDA] Add -disable-llvm-passes to CodeGenCUDA/link-device-bitcode.cu. NFC We already have this flag in most of the file, but we need it everywhere else, to disable the NVVMReflect pass, which we're explicitly checking doesn't run here. (Upcoming changes to llvm will cause it to be run.) llvm-svn: 264969 2016-03-31 07:45:38 +08:00			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \`
			`// RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \`
			`// RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll`
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`//`
			`// Make sure function in device-code gets linked in and internalized.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \`
			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR`

			`// Make sure legacy flag name works`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
Allow linking multiple bitcode files. Linking options for particular file depend on the option that specifies the file. Currently there are two: * -mlink-bitcode-file links in complete content of the specified file. * -mlink-cuda-bitcode links in only the symbols needed by current TU. Linked symbols are internalized. This bitcode linking mode is used to link device-specific bitcode provided by CUDA. Files are linked in order they are specified on command line. -mlink-cuda-bitcode replaces -fcuda-uses-libdevice flag. Differential Revision: http://reviews.llvm.org/D13913 llvm-svn: 251427 2015-10-28 01:56:59 +08:00			`// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \`
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR`
			`//`
Allow linking multiple bitcode files. Linking options for particular file depend on the option that specifies the file. Currently there are two: * -mlink-bitcode-file links in complete content of the specified file. * -mlink-cuda-bitcode links in only the symbols needed by current TU. Linked symbols are internalized. This bitcode linking mode is used to link device-specific bitcode provided by CUDA. Files are linked in order they are specified on command line. -mlink-cuda-bitcode replaces -fcuda-uses-libdevice flag. Differential Revision: http://reviews.llvm.org/D13913 llvm-svn: 251427 2015-10-28 01:56:59 +08:00			`// Make sure we can link two bitcode files.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \`
Allow linking multiple bitcode files. Linking options for particular file depend on the option that specifies the file. Currently there are two: * -mlink-bitcode-file links in complete content of the specified file. * -mlink-cuda-bitcode links in only the symbols needed by current TU. Linked symbols are internalized. This bitcode linking mode is used to link device-specific bitcode provided by CUDA. Files are linked in order they are specified on command line. -mlink-cuda-bitcode replaces -fcuda-uses-libdevice flag. Differential Revision: http://reviews.llvm.org/D13913 llvm-svn: 251427 2015-10-28 01:56:59 +08:00			`// RUN: -emit-llvm -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2`
			`//`
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`// Make sure function in device-code gets linked but is not internalized`
			`// without -fcuda-uses-libdevice`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-bitcode-file %t.bc -emit-llvm \`
			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR-NLD`
			`//`
			`// Make sure NVVMReflect pass is enabled in NVPTX back-end.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \`
Remove -cc1 option "-backend-option". It means the same thing as -mllvm; there isn't any reason to have two options which do the same thing. Differential Revision: https://reviews.llvm.org/D45109 llvm-svn: 329965 2018-04-13 06:21:36 +08:00			`// RUN: -mllvm -debug-pass=Structure 2>&1 \`
[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`// RUN: \| FileCheck %s -check-prefix CHECK-REFLECT`

			`#include "Inputs/cuda.h"`

			`__device__ float device_mul_or_add(float a, float b);`
			`extern "C" __device__ double __nv_sin(double x);`
			`extern "C" __device__ double __nv_exp(double x);`

			`// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(`
			`// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(`
			`__device__ void should_not_be_internalized(float *data) {}`

			`// Make sure kernel call has not been internalized.`
			`// CHECK-IR-LABEL: define void @_Z6kernelPfS_`
			`// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(`
			`__global__ __attribute__((used)) void kernel(float out, float in) {`
			`*out = device_mul_or_add(in[0], in[1]);`
			`out += __nv_exp(__nv_sin(out));`
			`should_not_be_internalized(out);`
			`}`

			`// Make sure device_mul_or_add() is present in IR, is internal and`
			`// calls __nvvm_reflect().`
			`// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(`
			`// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(`
			`// CHECK-IR: call i32 @__nvvm_reflect`
			`// CHECK-IR: ret float`

Allow linking multiple bitcode files. Linking options for particular file depend on the option that specifies the file. Currently there are two: * -mlink-bitcode-file links in complete content of the specified file. * -mlink-cuda-bitcode links in only the symbols needed by current TU. Linked symbols are internalized. This bitcode linking mode is used to link device-specific bitcode provided by CUDA. Files are linked in order they are specified on command line. -mlink-cuda-bitcode replaces -fcuda-uses-libdevice flag. Differential Revision: http://reviews.llvm.org/D13913 llvm-svn: 251427 2015-10-28 01:56:59 +08:00			`// Make sure we've linked in and internalized only needed functions`
			`// from the second bitcode file.`
			`// CHECK-IR-2-LABEL: define internal double @__nv_sin`
			`// CHECK-IR-2-LABEL: define internal double @__nv_exp`
			`// CHECK-IR-2-NOT: double @__unused`

[CUDA] Postprocess bitcode linked in during device-side CUDA compilation. Link in and internalize the symbols we need from supplied bitcode library. Differential Revision: http://reviews.llvm.org/D11664 llvm-svn: 247317 2015-09-11 02:24:23 +08:00			`// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.`
			`// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1`