forked from OSchip/llvm-project
[CUDA] Make unattributed constexpr functions implicitly host+device.
With this patch, by a constexpr function is implicitly host+device unless: a) it's a variadic function (variadic functions are not allowed on the device side), or b) it's preceeded by a __device__ overload in a system header. The restriction on overloading __host__ __device__ functions on the basis of their CUDA attributes remains in place, but we use (b) to allow us to define __device__ overloads for constexpr functions in cmath, which would otherwise be __host__ __device__ and thus not overloadable. You can disable this behavior with -fno-cuda-host-device-constexpr. Reviewers: tra, rnk, rsmith Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D18380 llvm-svn: 264964
This commit is contained in:
parent
0cda764430
commit
ba122ab42f
|
@ -6491,6 +6491,12 @@ def err_variadic_device_fn : Error<
|
||||||
def err_va_arg_in_device : Error<
|
def err_va_arg_in_device : Error<
|
||||||
"CUDA device code does not support va_arg">;
|
"CUDA device code does not support va_arg">;
|
||||||
def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">;
|
def err_alias_not_supported_on_nvptx : Error<"CUDA does not support aliases">;
|
||||||
|
def err_cuda_unattributed_constexpr_cannot_overload_device : Error<
|
||||||
|
"constexpr function '%0' without __host__ or __device__ attributes cannot "
|
||||||
|
"overload __device__ function with same signature. Add a __host__ "
|
||||||
|
"attribute, or build with -fno-cuda-host-device-constexpr.">;
|
||||||
|
def note_cuda_conflicting_device_function_declared_here : Note<
|
||||||
|
"conflicting __device__ function declared here">;
|
||||||
def err_dynamic_var_init : Error<
|
def err_dynamic_var_init : Error<
|
||||||
"dynamic initialization is not supported for "
|
"dynamic initialization is not supported for "
|
||||||
"__device__, __constant__, and __shared__ variables.">;
|
"__device__, __constant__, and __shared__ variables.">;
|
||||||
|
|
|
@ -172,6 +172,7 @@ LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
|
||||||
|
|
||||||
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
|
LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
|
||||||
LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
|
LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
|
||||||
|
LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
|
||||||
|
|
||||||
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
|
LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
|
||||||
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
|
LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
|
||||||
|
|
|
@ -691,6 +691,8 @@ def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
|
||||||
HelpText<"Incorporate CUDA device-side binary into host object file.">;
|
HelpText<"Incorporate CUDA device-side binary into host object file.">;
|
||||||
def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
|
def fcuda_allow_variadic_functions : Flag<["-"], "fcuda-allow-variadic-functions">,
|
||||||
HelpText<"Allow variadic functions in CUDA device code.">;
|
HelpText<"Allow variadic functions in CUDA device code.">;
|
||||||
|
def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">,
|
||||||
|
HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// OpenMP Options
|
// OpenMP Options
|
||||||
|
|
|
@ -2192,7 +2192,8 @@ public:
|
||||||
const LookupResult &OldDecls,
|
const LookupResult &OldDecls,
|
||||||
NamedDecl *&OldDecl,
|
NamedDecl *&OldDecl,
|
||||||
bool IsForUsingDecl);
|
bool IsForUsingDecl);
|
||||||
bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl);
|
bool IsOverload(FunctionDecl *New, FunctionDecl *Old, bool IsForUsingDecl,
|
||||||
|
bool ConsiderCudaAttrs = true);
|
||||||
|
|
||||||
/// \brief Checks availability of the function depending on the current
|
/// \brief Checks availability of the function depending on the current
|
||||||
/// function context.Inside an unavailable function,unavailability is ignored.
|
/// function context.Inside an unavailable function,unavailability is ignored.
|
||||||
|
@ -8904,6 +8905,11 @@ public:
|
||||||
return IdentifyCUDAPreference(Caller, Callee) == CFP_Never;
|
return IdentifyCUDAPreference(Caller, Callee) == CFP_Never;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
|
||||||
|
/// depending on FD and the current compilation settings.
|
||||||
|
void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
|
||||||
|
const LookupResult &Previous);
|
||||||
|
|
||||||
/// Finds a function in \p Matches with highest calling priority
|
/// Finds a function in \p Matches with highest calling priority
|
||||||
/// from \p Caller context and erases all functions with lower
|
/// from \p Caller context and erases all functions with lower
|
||||||
/// calling priority.
|
/// calling priority.
|
||||||
|
|
|
@ -1560,6 +1560,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
||||||
if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
|
if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
|
||||||
Opts.CUDAAllowVariadicFunctions = 1;
|
Opts.CUDAAllowVariadicFunctions = 1;
|
||||||
|
|
||||||
|
if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
|
||||||
|
Opts.CUDAHostDeviceConstexpr = 0;
|
||||||
|
|
||||||
if (Opts.ObjC1) {
|
if (Opts.ObjC1) {
|
||||||
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
|
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
|
||||||
StringRef value = arg->getValue();
|
StringRef value = arg->getValue();
|
||||||
|
|
|
@ -11,12 +11,14 @@
|
||||||
///
|
///
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "clang/Sema/Sema.h"
|
|
||||||
#include "clang/AST/ASTContext.h"
|
#include "clang/AST/ASTContext.h"
|
||||||
#include "clang/AST/Decl.h"
|
#include "clang/AST/Decl.h"
|
||||||
#include "clang/AST/ExprCXX.h"
|
#include "clang/AST/ExprCXX.h"
|
||||||
#include "clang/Lex/Preprocessor.h"
|
#include "clang/Lex/Preprocessor.h"
|
||||||
|
#include "clang/Sema/Lookup.h"
|
||||||
|
#include "clang/Sema/Sema.h"
|
||||||
#include "clang/Sema/SemaDiagnostic.h"
|
#include "clang/Sema/SemaDiagnostic.h"
|
||||||
|
#include "clang/Sema/Template.h"
|
||||||
#include "llvm/ADT/Optional.h"
|
#include "llvm/ADT/Optional.h"
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
|
@ -381,3 +383,50 @@ bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
|
||||||
|
// treated as implicitly __host__ __device__, unless:
|
||||||
|
// * it is a variadic function (device-side variadic functions are not
|
||||||
|
// allowed), or
|
||||||
|
// * a __device__ function with this signature was already declared, in which
|
||||||
|
// case in which case we output an error, unless the __device__ decl is in a
|
||||||
|
// system header, in which case we leave the constexpr function unattributed.
|
||||||
|
void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
|
||||||
|
const LookupResult &Previous) {
|
||||||
|
assert(getLangOpts().CUDA && "May be called only for CUDA compilations.");
|
||||||
|
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
|
||||||
|
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
|
||||||
|
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Is D a __device__ function with the same signature as NewD, ignoring CUDA
|
||||||
|
// attributes?
|
||||||
|
auto IsMatchingDeviceFn = [&](NamedDecl *D) {
|
||||||
|
if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
|
||||||
|
D = Using->getTargetDecl();
|
||||||
|
FunctionDecl *OldD = D->getAsFunction();
|
||||||
|
return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
|
||||||
|
!OldD->hasAttr<CUDAHostAttr>() &&
|
||||||
|
!IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
|
||||||
|
/* ConsiderCudaAttrs = */ false);
|
||||||
|
};
|
||||||
|
auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
|
||||||
|
if (It != Previous.end()) {
|
||||||
|
// We found a __device__ function with the same name and signature as NewD
|
||||||
|
// (ignoring CUDA attrs). This is an error unless that function is defined
|
||||||
|
// in a system header, in which case we simply return without making NewD
|
||||||
|
// host+device.
|
||||||
|
NamedDecl *Match = *It;
|
||||||
|
if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
|
||||||
|
Diag(NewD->getLocation(),
|
||||||
|
diag::err_cuda_unattributed_constexpr_cannot_overload_device)
|
||||||
|
<< NewD->getName();
|
||||||
|
Diag(Match->getLocation(),
|
||||||
|
diag::note_cuda_conflicting_device_function_declared_here);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
|
||||||
|
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
|
||||||
|
}
|
||||||
|
|
|
@ -8009,6 +8009,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
|
||||||
// Handle attributes.
|
// Handle attributes.
|
||||||
ProcessDeclAttributes(S, NewFD, D);
|
ProcessDeclAttributes(S, NewFD, D);
|
||||||
|
|
||||||
|
if (getLangOpts().CUDA)
|
||||||
|
maybeAddCUDAHostDeviceAttrs(S, NewFD, Previous);
|
||||||
|
|
||||||
if (getLangOpts().OpenCL) {
|
if (getLangOpts().OpenCL) {
|
||||||
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
|
// OpenCL v1.1 s6.5: Using an address space qualifier in a function return
|
||||||
// type declaration will generate a compilation error.
|
// type declaration will generate a compilation error.
|
||||||
|
|
|
@ -992,7 +992,7 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
|
bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
|
||||||
bool UseMemberUsingDeclRules) {
|
bool UseMemberUsingDeclRules, bool ConsiderCudaAttrs) {
|
||||||
// C++ [basic.start.main]p2: This function shall not be overloaded.
|
// C++ [basic.start.main]p2: This function shall not be overloaded.
|
||||||
if (New->isMain())
|
if (New->isMain())
|
||||||
return false;
|
return false;
|
||||||
|
@ -1125,7 +1125,7 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getLangOpts().CUDA) {
|
if (getLangOpts().CUDA && ConsiderCudaAttrs) {
|
||||||
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
|
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
|
||||||
OldTarget = IdentifyCUDATarget(Old);
|
OldTarget = IdentifyCUDATarget(Old);
|
||||||
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
|
if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
// This header is used by tests which are interested in __device__ functions
|
||||||
|
// which appear in a system header.
|
||||||
|
|
||||||
|
__device__ int OverloadMe();
|
||||||
|
|
||||||
|
namespace ns {
|
||||||
|
using ::OverloadMe;
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s
|
||||||
|
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -isystem %S/Inputs %s -fcuda-is-device
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
// Declares one function and pulls it into namespace ns:
|
||||||
|
//
|
||||||
|
// __device__ int OverloadMe();
|
||||||
|
// namespace ns { using ::OverloadMe; }
|
||||||
|
//
|
||||||
|
// Clang cares that this is done in a system header.
|
||||||
|
#include <overload.h>
|
||||||
|
|
||||||
|
// Opaque type used to determine which overload we're invoking.
|
||||||
|
struct HostReturnTy {};
|
||||||
|
|
||||||
|
// These shouldn't become host+device because they already have attributes.
|
||||||
|
__host__ constexpr int HostOnly() { return 0; }
|
||||||
|
// expected-note@-1 0+ {{not viable}}
|
||||||
|
__device__ constexpr int DeviceOnly() { return 0; }
|
||||||
|
// expected-note@-1 0+ {{not viable}}
|
||||||
|
|
||||||
|
constexpr int HostDevice() { return 0; }
|
||||||
|
|
||||||
|
// This should be a host-only function, because there's a previous __device__
|
||||||
|
// overload in <overload.h>.
|
||||||
|
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
|
||||||
|
|
||||||
|
namespace ns {
|
||||||
|
// The "using" statement in overload.h should prevent OverloadMe from being
|
||||||
|
// implicitly host+device.
|
||||||
|
constexpr HostReturnTy OverloadMe() { return HostReturnTy(); }
|
||||||
|
} // namespace ns
|
||||||
|
|
||||||
|
// This is an error, because NonSysHdrOverload was not defined in a system
|
||||||
|
// header.
|
||||||
|
__device__ int NonSysHdrOverload() { return 0; }
|
||||||
|
// expected-note@-1 {{conflicting __device__ function declared here}}
|
||||||
|
constexpr int NonSysHdrOverload() { return 0; }
|
||||||
|
// expected-error@-1 {{constexpr function 'NonSysHdrOverload' without __host__ or __device__ attributes}}
|
||||||
|
|
||||||
|
// Variadic device functions are not allowed, so this is just treated as
|
||||||
|
// host-only.
|
||||||
|
constexpr void Variadic(const char*, ...);
|
||||||
|
// expected-note@-1 {{call to __host__ function from __device__ function}}
|
||||||
|
|
||||||
|
__host__ void HostFn() {
|
||||||
|
HostOnly();
|
||||||
|
DeviceOnly(); // expected-error {{no matching function}}
|
||||||
|
HostReturnTy x = OverloadMe();
|
||||||
|
HostReturnTy y = ns::OverloadMe();
|
||||||
|
Variadic("abc", 42);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ void DeviceFn() {
|
||||||
|
HostOnly(); // expected-error {{no matching function}}
|
||||||
|
DeviceOnly();
|
||||||
|
int x = OverloadMe();
|
||||||
|
int y = ns::OverloadMe();
|
||||||
|
Variadic("abc", 42); // expected-error {{no matching function}}
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ void HostDeviceFn() {
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
int y = OverloadMe();
|
||||||
|
#else
|
||||||
|
constexpr HostReturnTy y = OverloadMe();
|
||||||
|
#endif
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -verify %s
|
||||||
|
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fno-cuda-host-device-constexpr -fcuda-is-device -verify %s
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
// Check that, with -fno-cuda-host-device-constexpr, constexpr functions are
|
||||||
|
// host-only, and __device__ constexpr functions are still device-only.
|
||||||
|
|
||||||
|
constexpr int f() { return 0; } // expected-note {{not viable}}
|
||||||
|
__device__ constexpr int g() { return 0; } // expected-note {{not viable}}
|
||||||
|
|
||||||
|
void __device__ foo() {
|
||||||
|
f(); // expected-error {{no matching function}}
|
||||||
|
g();
|
||||||
|
}
|
||||||
|
|
||||||
|
void __host__ foo() {
|
||||||
|
f();
|
||||||
|
g(); // expected-error {{no matching function}}
|
||||||
|
}
|
Loading…
Reference in New Issue