[CUDA] Add appropriate host/device attribute to builtins.

The changes are part of attribute-based CUDA function overloading (D12453)
and as such are only enabled when it's in effect (-fcuda-target-overloads).

Differential Revision: http://reviews.llvm.org/D12122

llvm-svn: 248296
This commit is contained in:
Artem Belevich 2015-09-22 17:23:05 +00:00
parent 94a55e8169
commit 9674a64cd9
5 changed files with 60 additions and 4 deletions

View File

@ -81,6 +81,11 @@ public:
return getRecord(ID).Type;
}
/// \brief Return true if this function is a target-specific builtin
bool isTSBuiltin(unsigned ID) const {
return ID >= Builtin::FirstTSBuiltin;
}
/// \brief Return true if this function has no side effects and doesn't
/// read memory.
bool isConst(unsigned ID) const {

View File

@ -529,7 +529,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
// Since the target specific builtins for each arch overlap, only check those
// of the arch we are compiling for.
if (BuiltinID >= Builtin::FirstTSBuiltin) {
if (Context.BuiltinInfo.isTSBuiltin(BuiltinID)) {
switch (Context.getTargetInfo().getTriple().getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:

View File

@ -11290,6 +11290,18 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr<ConstAttr>())
FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads &&
Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
!FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAHostAttr>()) {
// Target-specific builtins are assumed to be intended for use
// in this particular CUDA compilation mode and should have
// appropriate attribute set so we can enforce CUDA function
// call restrictions.
if (getLangOpts().CUDAIsDevice)
FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation()));
else
FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation()));
}
}
IdentifierInfo *Name = FD->getIdentifier();

View File

@ -0,0 +1,36 @@
// Tests that target-specific builtins have appropriate host/device
// attributes and that CUDA call restrictions are enforced. Also
// verify that non-target builtins can be used from both host and
// device functions.
//
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
// RUN: %clang_cc1 -triple x86_64-unknown-unknown \
// RUN: -fcuda-target-overloads -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
// RUN: -fcuda-target-overloads -fsyntax-only -verify %s
#ifdef __CUDA_ARCH__
// Device-side builtins are not allowed to be called from host functions.
void hf() {
int x = __builtin_ptx_read_tid_x(); // expected-note {{'__builtin_ptx_read_tid_x' declared here}}
// expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}}
x = __builtin_abs(1);
}
__attribute__((device)) void df() {
int x = __builtin_ptx_read_tid_x();
x = __builtin_abs(1);
}
#else
// Host-side builtins are not allowed to be called from device functions.
__attribute__((device)) void df() {
int x = __builtin_ia32_rdtsc(); // expected-note {{'__builtin_ia32_rdtsc' declared here}}
// expected-error@-1 {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
x = __builtin_abs(1);
}
void hf() {
int x = __builtin_ia32_rdtsc();
x = __builtin_abs(1);
}
#endif

View File

@ -1,10 +1,13 @@
// RUN: %clang_cc1 -std=gnu++11 -triple nvptx64-unknown-unknown -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device \
// RUN: -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device \
// RUN: -fcuda-target-overloads -fsyntax-only -verify %s
#include "Inputs/cuda.h"
// expected-no-diagnostics
__device__ void __threadfence_system() {
// This shouldn't produce an error, since __nvvm_membar_sys is inferred to
// be __host__ __device__ and thus callable from device code.
// This shouldn't produce an error, since __nvvm_membar_sys should be
// __device__ and thus callable from device code.
__nvvm_membar_sys();
}