[CUDA] Make unattributed constexpr functions implicitly host+device.

With this patch, by a constexpr function is implicitly host+device
unless:

 a) it's a variadic function (variadic functions are not allowed on the
    device side), or
 b) it's preceeded by a __device__ overload in a system header.

The restriction on overloading __host__ __device__ functions on the
basis of their CUDA attributes remains in place, but we use (b) to allow
us to define __device__ overloads for constexpr functions in cmath,
which would otherwise be __host__ __device__ and thus not overloadable.

You can disable this behavior with -fno-cuda-host-device-constexpr.

Reviewers: tra, rnk, rsmith

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D18380

llvm-svn: 264964
This commit is contained in:
Justin Lebar 2016-03-30 23:30:21 +00:00
parent 0cda764430
commit ba122ab42f
11 changed files with 171 additions and 4 deletions

View File

@ -6491,6 +6491,12 @@ def err_variadic_device_fn : Error<
def err_va_arg_in_device : Error< def err_va_arg_in_device : Error<
"CUDA device code does not support va_arg">; "CUDA device code does not support va_arg">;
def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">; def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">;
def err_cuda_unattributed_constexpr_cannot_overload_device : Error<
"constexpr function '%0' without __host__ or __device__ attributes cannot "
"overload __device__ function with same signature. Add a __host__ "
"attribute, or build with -fno-cuda-host-device-constexpr.">;
def note_cuda_conflicting_device_function_declared_here : Note<
"conflicting __device__ function declared here">;
def err_dynamic_var_init : Error< def err_dynamic_var_init : Error<
"dynamic initialization is not supported for " "dynamic initialization is not supported for "
"__device__, __constant__, and __shared__ variables.">; "__device__, __constant__, and __shared__ variables.">;

View File

@ -172,6 +172,7 @@ LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code") LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators") LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions") LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")

View File

@ -691,6 +691,8 @@ def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
HelpText<"Incorporate CUDA device-side binary into host object file.">; HelpText<"Incorporate CUDA device-side binary into host object file.">;
def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">, def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
HelpText<"Allow variadic functions in CUDA device code.">; HelpText<"Allow variadic functions in CUDA device code.">;
def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">,
HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// OpenMP Options // OpenMP Options

View File

@ -2192,7 +2192,8 @@ public:
const LookupResult &OldDecls, const LookupResult &OldDecls,
NamedDecl *&OldDecl, NamedDecl *&OldDecl,
bool IsForUsingDecl); bool IsForUsingDecl);
bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl); bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl,
bool ConsiderCudaAttrs = true);
/// \brief Checks availability of the function depending on the current /// \brief Checks availability of the function depending on the current
/// function context.Inside an unavailable function,unavailability is ignored. /// function context.Inside an unavailable function,unavailability is ignored.
@ -8904,6 +8905,11 @@ public:
return IdentifyCUDAPreference(Caller, Callee) == CFP_Never; return IdentifyCUDAPreference(Caller, Callee) == CFP_Never;
} }
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
/// depending on FD and the current compilation settings.
void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
const LookupResult &Previous);
/// Finds a function in \p Matches with highest calling priority /// Finds a function in \p Matches with highest calling priority
/// from \p Caller context and erases all functions with lower /// from \p Caller context and erases all functions with lower
/// calling priority. /// calling priority.

View File

@ -1560,6 +1560,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
if (Args.hasArg(OPT_fcuda_allow_variadic_functions)) if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
Opts.CUDAAllowVariadicFunctions = 1; Opts.CUDAAllowVariadicFunctions = 1;
if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
Opts.CUDAHostDeviceConstexpr = 0;
if (Opts.ObjC1) { if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue(); StringRef value = arg->getValue();

View File

@ -11,12 +11,14 @@
/// ///
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "clang/Sema/Sema.h"
#include "clang/AST/ASTContext.h" #include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h" #include "clang/AST/Decl.h"
#include "clang/AST/ExprCXX.h" #include "clang/AST/ExprCXX.h"
#include "clang/Lex/Preprocessor.h" #include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaDiagnostic.h"
#include "clang/Sema/Template.h"
#include "llvm/ADT/Optional.h" #include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
using namespace clang; using namespace clang;
@ -381,3 +383,50 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
return true; return true;
} }
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
// treated as implicitly __host__ __device__, unless:
// * it is a variadic function (device-side variadic functions are not
// allowed), or
// * a __device__ function with this signature was already declared, in which
// case in which case we output an error, unless the __device__ decl is in a
// system header, in which case we leave the constexpr function unattributed.
void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
const LookupResult &Previous) {
assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
return;
// Is D a __device__ function with the same signature as NewD, ignoring CUDA
// attributes?
auto IsMatchingDeviceFn = [&](NamedDecl *D) {
if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
D = Using->getTargetDecl();
FunctionDecl *OldD = D->getAsFunction();
return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
!OldD->hasAttr<CUDAHostAttr>() &&
!IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
/* ConsiderCudaAttrs = */ false);
};
auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
if (It != Previous.end()) {
// We found a __device__ function with the same name and signature as NewD
// (ignoring CUDA attrs). This is an error unless that function is defined
// in a system header, in which case we simply return without making NewD
// host+device.
NamedDecl *Match = *It;
if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
Diag(NewD->getLocation(),
diag::err_cuda_unattributed_constexpr_cannot_overload_device)
<< NewD->getName();
Diag(Match->getLocation(),
diag::note_cuda_conflicting_device_function_declared_here);
}
return;
}
NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}

View File

@ -8009,6 +8009,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// Handle attributes. // Handle attributes.
ProcessDeclAttributes(S, NewFD, D); ProcessDeclAttributes(S, NewFD, D);
if (getLangOpts().CUDA)
maybeAddCUDAHostDeviceAttrs(S, NewFD, Previous);
if (getLangOpts().OpenCL) { if (getLangOpts().OpenCL) {
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return // OpenCL v1.1 s6.5: Using an address space qualifier in a function return
// type declaration will generate a compilation error. // type declaration will generate a compilation error.

View File

@ -992,7 +992,7 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
} }
bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old, bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
bool UseMemberUsingDeclRules) { bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) {
// C++ [basic.start.main]p2: This function shall not be overloaded. // C++ [basic.start.main]p2: This function shall not be overloaded.
if (New->isMain()) if (New->isMain())
return false; return false;
@ -1125,7 +1125,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
return true; return true;
} }
if (getLangOpts().CUDA) { if (getLangOpts().CUDA && ConsiderCudaAttrs) {
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New), CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
OldTarget = IdentifyCUDATarget(Old); OldTarget = IdentifyCUDATarget(Old);
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global) if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)

View File

@ -0,0 +1,8 @@
// This header is used by tests which are interested in __device__ functions
// which appear in a system header.
__device__ int OverloadMe();
namespace ns {
using ::OverloadMe;
}

View File

@ -0,0 +1,69 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device
#include "Inputs/cuda.h"
// Declares one function and pulls it into namespace ns:
//
// __device__ int OverloadMe();
// namespace ns { using ::OverloadMe; }
//
// Clang cares that this is done in a system header.
#include <overload.h>
// Opaque type used to determine which overload we're invoking.
struct HostReturnTy {};
// These shouldn't become host+device because they already have attributes.
__host__ constexpr int HostOnly() { return 0; }
// expected-note@-1 0+ {{not viable}}
__device__ constexpr int DeviceOnly() { return 0; }
// expected-note@-1 0+ {{not viable}}
constexpr int HostDevice() { return 0; }
// This should be a host-only function, because there's a previous __device__
// overload in <overload.h>.
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
namespace ns {
// The "using" statement in overload.h should prevent OverloadMe from being
// implicitly host+device.
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
} // namespace ns
// This is an error, because NonSysHdrOverload was not defined in a system
// header.
__device__ int NonSysHdrOverload() { return 0; }
// expected-note@-1 {{conflicting __device__ function declared here}}
constexpr int NonSysHdrOverload() { return 0; }
// expected-error@-1 {{constexpr function 'NonSysHdrOverload' without __host__ or __device__ attributes}}
// Variadic device functions are not allowed, so this is just treated as
// host-only.
constexpr void Variadic(const char*, ...);
// expected-note@-1 {{call to __host__ function from __device__ function}}
__host__ void HostFn() {
HostOnly();
DeviceOnly(); // expected-error {{no matching function}}
HostReturnTy x = OverloadMe();
HostReturnTy y = ns::OverloadMe();
Variadic("abc", 42);
}
__device__ void DeviceFn() {
HostOnly(); // expected-error {{no matching function}}
DeviceOnly();
int x = OverloadMe();
int y = ns::OverloadMe();
Variadic("abc", 42); // expected-error {{no matching function}}
}
__host__ __device__ void HostDeviceFn() {
#ifdef __CUDA_ARCH__
int y = OverloadMe();
#else
constexpr HostReturnTy y = OverloadMe();
#endif
}

View File

@ -0,0 +1,20 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -verify %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -fcuda-is-device -verify %s
#include "Inputs/cuda.h"
// Check that, with -fno-cuda-host-device-constexpr, constexpr functions are
// host-only, and __device__ constexpr functions are still device-only.
constexpr int f() { return 0; } // expected-note {{not viable}}
__device__ constexpr int g() { return 0; } // expected-note {{not viable}}
void __device__ foo() {
f(); // expected-error {{no matching function}}
g();
}
void __host__ foo() {
f();
g(); // expected-error {{no matching function}}
}