[CUDA] Make unattributed constexpr functions implicitly host+device.

With this patch, by a constexpr function is implicitly host+device
unless:

 a) it's a variadic function (variadic functions are not allowed on the
    device side), or
 b) it's preceeded by a __device__ overload in a system header.

The restriction on overloading __host__ __device__ functions on the
basis of their CUDA attributes remains in place, but we use (b) to allow
us to define __device__ overloads for constexpr functions in cmath,
which would otherwise be __host__ __device__ and thus not overloadable.

You can disable this behavior with -fno-cuda-host-device-constexpr.

Reviewers: tra, rnk, rsmith

Subscribers: cfe-commits

Differential Revision: http://reviews.llvm.org/D18380

llvm-svn: 264964
This commit is contained in:
Justin Lebar 2016-03-30 23:30:21 +00:00
parent 0cda764430
commit ba122ab42f
11 changed files with 171 additions and 4 deletions

View File

@ -6491,6 +6491,12 @@ def err_variadic_device_fn : Error<
def err_va_arg_in_device : Error<
"CUDA device code does not support va_arg">;
def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">;
def err_cuda_unattributed_constexpr_cannot_overload_device : Error<
"constexpr function '%0' without __host__ or __device__ attributes cannot "
"overload __device__ function with same signature. Add a __host__ "
"attribute, or build with -fno-cuda-host-device-constexpr.">;
def note_cuda_conflicting_device_function_declared_here : Note<
"conflicting __device__ function declared here">;
def err_dynamic_var_init : Error<
"dynamic initialization is not supported for "
"__device__, __constant__, and __shared__ variables.">;

View File

@ -172,6 +172,7 @@ LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")

View File

@ -691,6 +691,8 @@ def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
HelpText<"Incorporate CUDA device-side binary into host object file.">;
def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
HelpText<"Allow variadic functions in CUDA device code.">;
def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">,
HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">;
//===----------------------------------------------------------------------===//
// OpenMP Options

View File

@ -2192,7 +2192,8 @@ public:
const LookupResult &OldDecls,
NamedDecl *&OldDecl,
bool IsForUsingDecl);
bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl);
bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl,
bool ConsiderCudaAttrs = true);
/// \brief Checks availability of the function depending on the current
/// function context.Inside an unavailable function,unavailability is ignored.
@ -8904,6 +8905,11 @@ public:
return IdentifyCUDAPreference(Caller, Callee) == CFP_Never;
}
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
/// depending on FD and the current compilation settings.
void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
const LookupResult &Previous);
/// Finds a function in \p Matches with highest calling priority
/// from \p Caller context and erases all functions with lower
/// calling priority.

View File

@ -1560,6 +1560,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
Opts.CUDAAllowVariadicFunctions = 1;
if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
Opts.CUDAHostDeviceConstexpr = 0;
if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();

View File

@ -11,12 +11,14 @@
///
//===----------------------------------------------------------------------===//
#include "clang/Sema/Sema.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/ExprCXX.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/SemaDiagnostic.h"
#include "clang/Sema/Template.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
using namespace clang;
@ -381,3 +383,50 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
return true;
}
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
// treated as implicitly __host__ __device__, unless:
// * it is a variadic function (device-side variadic functions are not
// allowed), or
// * a __device__ function with this signature was already declared, in which
// case in which case we output an error, unless the __device__ decl is in a
// system header, in which case we leave the constexpr function unattributed.
void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
const LookupResult &Previous) {
assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
return;
// Is D a __device__ function with the same signature as NewD, ignoring CUDA
// attributes?
auto IsMatchingDeviceFn = [&](NamedDecl *D) {
if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
D = Using->getTargetDecl();
FunctionDecl *OldD = D->getAsFunction();
return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
!OldD->hasAttr<CUDAHostAttr>() &&
!IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
/* ConsiderCudaAttrs = */ false);
};
auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
if (It != Previous.end()) {
// We found a __device__ function with the same name and signature as NewD
// (ignoring CUDA attrs). This is an error unless that function is defined
// in a system header, in which case we simply return without making NewD
// host+device.
NamedDecl *Match = *It;
if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
Diag(NewD->getLocation(),
diag::err_cuda_unattributed_constexpr_cannot_overload_device)
<< NewD->getName();
Diag(Match->getLocation(),
diag::note_cuda_conflicting_device_function_declared_here);
}
return;
}
NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
}

View File

@ -8009,6 +8009,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
// Handle attributes.
ProcessDeclAttributes(S, NewFD, D);
if (getLangOpts().CUDA)
maybeAddCUDAHostDeviceAttrs(S, NewFD, Previous);
if (getLangOpts().OpenCL) {
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
// type declaration will generate a compilation error.

View File

@ -992,7 +992,7 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
}
bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
bool UseMemberUsingDeclRules) {
bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) {
// C++ [basic.start.main]p2: This function shall not be overloaded.
if (New->isMain())
return false;
@ -1125,7 +1125,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
return true;
}
if (getLangOpts().CUDA) {
if (getLangOpts().CUDA && ConsiderCudaAttrs) {
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
OldTarget = IdentifyCUDATarget(Old);
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)

View File

@ -0,0 +1,8 @@
// This header is used by tests which are interested in __device__ functions
// which appear in a system header.
__device__ int OverloadMe();
namespace ns {
using ::OverloadMe;
}

View File

@ -0,0 +1,69 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device
#include "Inputs/cuda.h"
// Declares one function and pulls it into namespace ns:
//
// __device__ int OverloadMe();
// namespace ns { using ::OverloadMe; }
//
// Clang cares that this is done in a system header.
#include <overload.h>
// Opaque type used to determine which overload we're invoking.
struct HostReturnTy {};
// These shouldn't become host+device because they already have attributes.
__host__ constexpr int HostOnly() { return 0; }
// expected-note@-1 0+ {{not viable}}
__device__ constexpr int DeviceOnly() { return 0; }
// expected-note@-1 0+ {{not viable}}
constexpr int HostDevice() { return 0; }
// This should be a host-only function, because there's a previous __device__
// overload in <overload.h>.
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
namespace ns {
// The "using" statement in overload.h should prevent OverloadMe from being
// implicitly host+device.
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
} // namespace ns
// This is an error, because NonSysHdrOverload was not defined in a system
// header.
__device__ int NonSysHdrOverload() { return 0; }
// expected-note@-1 {{conflicting __device__ function declared here}}
constexpr int NonSysHdrOverload() { return 0; }
// expected-error@-1 {{constexpr function 'NonSysHdrOverload' without __host__ or __device__ attributes}}
// Variadic device functions are not allowed, so this is just treated as
// host-only.
constexpr void Variadic(const char*, ...);
// expected-note@-1 {{call to __host__ function from __device__ function}}
__host__ void HostFn() {
HostOnly();
DeviceOnly(); // expected-error {{no matching function}}
HostReturnTy x = OverloadMe();
HostReturnTy y = ns::OverloadMe();
Variadic("abc", 42);
}
__device__ void DeviceFn() {
HostOnly(); // expected-error {{no matching function}}
DeviceOnly();
int x = OverloadMe();
int y = ns::OverloadMe();
Variadic("abc", 42); // expected-error {{no matching function}}
}
__host__ __device__ void HostDeviceFn() {
#ifdef __CUDA_ARCH__
int y = OverloadMe();
#else
constexpr HostReturnTy y = OverloadMe();
#endif
}

View File

@ -0,0 +1,20 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -verify %s
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -fcuda-is-device -verify %s
#include "Inputs/cuda.h"
// Check that, with -fno-cuda-host-device-constexpr, constexpr functions are
// host-only, and __device__ constexpr functions are still device-only.
constexpr int f() { return 0; } // expected-note {{not viable}}
__device__ constexpr int g() { return 0; } // expected-note {{not viable}}
void __device__ foo() {
f(); // expected-error {{no matching function}}
g();
}
void __host__ foo() {
f();
g(); // expected-error {{no matching function}}
}