forked from OSchip/llvm-project
[CUDA] Do not merge CUDA target attributes.
CUDA target attributes are used for function overloading and must not be merged. This fixes a bug where attributes were inherited during function template specialization in CUDA and made it impossible for specialized function to provide its own target attributes. Differential Revision: https://reviews.llvm.org/D24522 llvm-svn: 281406
This commit is contained in:
parent
255abad9b1
commit
bed18e9cc4
|
@ -2290,7 +2290,13 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
|
||||||
NewAttr = S.mergeAlwaysInlineAttr(D, AA->getRange(),
|
NewAttr = S.mergeAlwaysInlineAttr(D, AA->getRange(),
|
||||||
&S.Context.Idents.get(AA->getSpelling()),
|
&S.Context.Idents.get(AA->getSpelling()),
|
||||||
AttrSpellingListIndex);
|
AttrSpellingListIndex);
|
||||||
else if (const auto *MA = dyn_cast<MinSizeAttr>(Attr))
|
else if (S.getLangOpts().CUDA && isa<FunctionDecl>(D) &&
|
||||||
|
(isa<CUDAHostAttr>(Attr) || isa<CUDADeviceAttr>(Attr) ||
|
||||||
|
isa<CUDAGlobalAttr>(Attr))) {
|
||||||
|
// CUDA target attributes are part of function signature for
|
||||||
|
// overloading purposes and must not be merged.
|
||||||
|
return false;
|
||||||
|
} else if (const auto *MA = dyn_cast<MinSizeAttr>(Attr))
|
||||||
NewAttr = S.mergeMinSizeAttr(D, MA->getRange(), AttrSpellingListIndex);
|
NewAttr = S.mergeMinSizeAttr(D, MA->getRange(), AttrSpellingListIndex);
|
||||||
else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))
|
else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))
|
||||||
NewAttr = S.mergeOptimizeNoneAttr(D, OA->getRange(), AttrSpellingListIndex);
|
NewAttr = S.mergeOptimizeNoneAttr(D, OA->getRange(), AttrSpellingListIndex);
|
||||||
|
|
|
@ -379,3 +379,14 @@ __host__ __device__ void test_host_device_single_side_overloading() {
|
||||||
HostReturnTy ret3 = host_only_function(1);
|
HostReturnTy ret3 = host_only_function(1);
|
||||||
HostReturnTy2 ret4 = host_only_function(1.0f);
|
HostReturnTy2 ret4 = host_only_function(1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify that we allow overloading function templates.
|
||||||
|
template <typename T> __host__ T template_overload(const T &a) { return a; };
|
||||||
|
template <typename T> __device__ T template_overload(const T &a) { return a; };
|
||||||
|
|
||||||
|
__host__ void test_host_template_overload() {
|
||||||
|
template_overload(1); // OK. Attribute-based overloading picks __host__ variant.
|
||||||
|
}
|
||||||
|
__device__ void test_device_template_overload() {
|
||||||
|
template_overload(1); // OK. Attribute-based overloading picks __device__ variant.
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
// Verifies correct inheritance of target attributes during template
|
||||||
|
// instantiation and specialization.
|
||||||
|
|
||||||
|
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
|
||||||
|
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
// Function must inherit target attributes during instantiation, but not during
|
||||||
|
// specialization.
|
||||||
|
template <typename T> __host__ __device__ T function_template(const T &a);
|
||||||
|
|
||||||
|
// Specialized functions have their own attributes.
|
||||||
|
// expected-note@+1 {{candidate function not viable: call to __host__ function from __device__ function}}
|
||||||
|
template <> __host__ float function_template<float>(const float &from);
|
||||||
|
|
||||||
|
// expected-note@+1 {{candidate function not viable: call to __device__ function from __host__ function}}
|
||||||
|
template <> __device__ double function_template<double>(const double &from);
|
||||||
|
|
||||||
|
__host__ void hf() {
|
||||||
|
function_template<float>(1.0f); // OK. Specialization is __host__.
|
||||||
|
function_template<double>(2.0); // expected-error {{no matching function for call to 'function_template'}}
|
||||||
|
function_template(1); // OK. Instantiated function template is HD.
|
||||||
|
}
|
||||||
|
__device__ void df() {
|
||||||
|
function_template<float>(3.0f); // expected-error {{no matching function for call to 'function_template'}}
|
||||||
|
function_template<double>(4.0); // OK. Specialization is __device__.
|
||||||
|
function_template(1); // OK. Instantiated function template is HD.
|
||||||
|
}
|
Loading…
Reference in New Issue