llvm-project/clang/test/CodeGenCUDA/propagate-metadata.cu

// Check that when we link a bitcode module into a file using
// -mlink-builtin-bitcode, we apply the same attributes to the functions in that
// bitcode module as we apply to functions we generate.
//
// In particular, we check that ftz and unsafe-math are propagated into the
// bitcode library as appropriate.
//
// In addition, we set -ftrapping-math on the bitcode library, but then set
// -fno-trapping-math on the main compilations, and ensure that the latter flag
// overrides the flag on the bitcode library.

// Build the bitcode library.  This is not built in CUDA mode, otherwise it
// might have incompatible attributes.  This mirrors how libdevice is built.
// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \
// RUN:   %s -o %t.bc -triple nvptx-unknown-unknown

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
// RUN:   -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
// RUN:   -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
// RUN:   -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \
// RUN:   --check-prefix=NOFAST

// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
// RUN:   -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
// RUN:   -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST

// Wrap everything in extern "C" so we don't have to worry about name mangling
// in the IR.
extern "C" {
#ifdef LIB

// This function is defined in the library and only declared in the main
// compilation.
void lib_fn() {}

#else

#include "Inputs/cuda.h"
__device__ void lib_fn();
__global__ void kernel() { lib_fn(); }

#endif
}

// The kernel and lib function should have the same attributes.
// CHECK: define void @kernel() [[attr:#[0-9]+]]
// CHECK: define internal void @lib_fn() [[attr]]

// Check the attribute list.
// CHECK: attributes [[attr]] = {
// CHECK: "no-trapping-math"="true"

// FTZ-SAME: "nvptx-f32ftz"="true"
// NOFTZ-NOT: "nvptx-f32ftz"="true"

// FAST-SAME: "unsafe-fp-math"="true"
// NOFAST-NOT: "unsafe-fp-math"="true"
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// Check that when we link a bitcode module into a file using`
Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// -mlink-builtin-bitcode, we apply the same attributes to the functions in that`
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// bitcode module as we apply to functions we generate.`
			`//`
			`// In particular, we check that ftz and unsafe-math are propagated into the`
			`// bitcode library as appropriate.`
			`//`
			`// In addition, we set -ftrapping-math on the bitcode library, but then set`
			`// -fno-trapping-math on the main compilations, and ensure that the latter flag`
			`// overrides the flag on the bitcode library.`

			`// Build the bitcode library. This is not built in CUDA mode, otherwise it`
			`// might have incompatible attributes. This mirrors how libdevice is built.`
			`// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \`
			`// RUN: %s -o %t.bc -triple nvptx-unknown-unknown`

Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \`
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \`
			`// RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST`

Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \`
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \`
			`// RUN: -fcuda-is-device -triple nvptx-unknown-unknown \`
			`// RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \`
			`// RUN: --check-prefix=NOFAST`

Rename -mlink-cuda-bitcode to -mlink-builtin-bitcode The same semantics work for OpenCL, and probably any offload language. Keep the old name around as an alias. llvm-svn: 340193 2018-08-21 02:16:48 +08:00			`// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \`
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \`
			`// RUN: -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \`
			`// RUN: \| FileCheck %s --check-prefix=CHECK --check-prefix=FAST`

Fix typos in clang Found via codespell -q 3 -I ../clang-whitelist.txt Where whitelist consists of: archtype cas classs checkk compres definit frome iff inteval ith lod methode nd optin ot pres statics te thru Patch by luzpaz! (This is a subset of D44188 that applies cleanly with a few files that have dubious fixes reverted.) Differential revision: https://reviews.llvm.org/D44188 llvm-svn: 329399 2018-04-06 23:14:32 +08:00			`// Wrap everything in extern "C" so we don't have to worry about name mangling`
[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules. Summary: Now when you ask clang to link in a bitcode module, you can tell it to set attributes on that module's functions to match what we would have set if we'd emitted those functions ourselves. This is particularly important for fast-math attributes in CUDA compilations. Each CUDA compilation links in libdevice, a bitcode library provided by nvidia as part of the CUDA distribution. Without this patch, if we have a user-function F that is compiled with -ffast-math that calls a function G from libdevice, F will have the unsafe-fp-math=true (etc.) attributes, but G will have no attributes. Since F calls G, the inliner will merge G's attributes into F's. It considers the lack of an unsafe-fp-math=true attribute on G to be tantamount to unsafe-fp-math=false, so it "merges" these by setting unsafe-fp-math=false on F. This then continues up the call graph, until every function that (transitively) calls something in libdevice gets unsafe-fp-math=false set, thus disabling fastmath in almost all CUDA code. Reviewers: echristo Subscribers: hfinkel, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D28538 llvm-svn: 293097 2017-01-26 05:29:48 +08:00			`// in the IR.`
			`extern "C" {`
			`#ifdef LIB`

			`// This function is defined in the library and only declared in the main`
			`// compilation.`
			`void lib_fn() {}`

			`#else`

			`#include "Inputs/cuda.h"`
			`__device__ void lib_fn();`
			`__global__ void kernel() { lib_fn(); }`

			`#endif`
			`}`

			`// The kernel and lib function should have the same attributes.`
			`// CHECK: define void @kernel() [[attr:#[0-9]+]]`
			`// CHECK: define internal void @lib_fn() [[attr]]`

			`// Check the attribute list.`
			`// CHECK: attributes [[attr]] = {`
			`// CHECK: "no-trapping-math"="true"`

			`// FTZ-SAME: "nvptx-f32ftz"="true"`
			`// NOFTZ-NOT: "nvptx-f32ftz"="true"`

			`// FAST-SAME: "unsafe-fp-math"="true"`
			`// NOFAST-NOT: "unsafe-fp-math"="true"`