forked from OSchip/llvm-project
recommit c77a4078e0
This commit is contained in:
parent
a8e5dcb072
commit
b46b1a916d
|
@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
|
||||||
return Comparison::Equal;
|
return Comparison::Equal;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
|
static Comparison
|
||||||
const OverloadCandidate &Cand2) {
|
isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
|
||||||
|
const OverloadCandidate &Cand2) {
|
||||||
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
|
if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
|
||||||
!Cand2.Function->isMultiVersion())
|
!Cand2.Function->isMultiVersion())
|
||||||
return false;
|
return Comparison::Equal;
|
||||||
|
|
||||||
// If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
|
// If both are invalid, they are equal. If one of them is invalid, the other
|
||||||
// is obviously better.
|
// is better.
|
||||||
if (Cand1.Function->isInvalidDecl()) return false;
|
if (Cand1.Function->isInvalidDecl()) {
|
||||||
if (Cand2.Function->isInvalidDecl()) return true;
|
if (Cand2.Function->isInvalidDecl())
|
||||||
|
return Comparison::Equal;
|
||||||
|
return Comparison::Worse;
|
||||||
|
}
|
||||||
|
if (Cand2.Function->isInvalidDecl())
|
||||||
|
return Comparison::Better;
|
||||||
|
|
||||||
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
|
// If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
|
||||||
// cpu_dispatch, else arbitrarily based on the identifiers.
|
// cpu_dispatch, else arbitrarily based on the identifiers.
|
||||||
|
@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
|
||||||
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
|
const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
|
||||||
|
|
||||||
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
|
if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
|
||||||
return false;
|
return Comparison::Equal;
|
||||||
|
|
||||||
if (Cand1CPUDisp && !Cand2CPUDisp)
|
if (Cand1CPUDisp && !Cand2CPUDisp)
|
||||||
return true;
|
return Comparison::Better;
|
||||||
if (Cand2CPUDisp && !Cand1CPUDisp)
|
if (Cand2CPUDisp && !Cand1CPUDisp)
|
||||||
return false;
|
return Comparison::Worse;
|
||||||
|
|
||||||
if (Cand1CPUSpec && Cand2CPUSpec) {
|
if (Cand1CPUSpec && Cand2CPUSpec) {
|
||||||
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
|
if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
|
||||||
return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
|
return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
|
||||||
|
? Comparison::Better
|
||||||
|
: Comparison::Worse;
|
||||||
|
|
||||||
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
|
std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
|
||||||
FirstDiff = std::mismatch(
|
FirstDiff = std::mismatch(
|
||||||
|
@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
|
||||||
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
|
assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
|
||||||
"Two different cpu-specific versions should not have the same "
|
"Two different cpu-specific versions should not have the same "
|
||||||
"identifier list, otherwise they'd be the same decl!");
|
"identifier list, otherwise they'd be the same decl!");
|
||||||
return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
|
return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
|
||||||
|
? Comparison::Better
|
||||||
|
: Comparison::Worse;
|
||||||
}
|
}
|
||||||
llvm_unreachable("No way to get here unless both had cpu_dispatch");
|
llvm_unreachable("No way to get here unless both had cpu_dispatch");
|
||||||
}
|
}
|
||||||
|
@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate(
|
||||||
else if (!Cand1.Viable)
|
else if (!Cand1.Viable)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// [CUDA] A function with 'never' preference is marked not viable, therefore
|
||||||
|
// is never shown up here. The worst preference shown up here is 'wrong side',
|
||||||
|
// e.g. a host function called by a device host function in device
|
||||||
|
// compilation. This is valid AST as long as the host device function is not
|
||||||
|
// emitted, e.g. it is an inline function which is called only by a host
|
||||||
|
// function. A deferred diagnostic will be triggered if it is emitted.
|
||||||
|
// However a wrong-sided function is still a viable candidate here.
|
||||||
|
//
|
||||||
|
// If Cand1 can be emitted and Cand2 cannot be emitted in the current
|
||||||
|
// context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
|
||||||
|
// can be emitted, Cand1 is not better than Cand2. This rule should have
|
||||||
|
// precedence over other rules.
|
||||||
|
//
|
||||||
|
// If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
|
||||||
|
// other rules should be used to determine which is better. This is because
|
||||||
|
// host/device based overloading resolution is mostly for determining
|
||||||
|
// viability of a function. If two functions are both viable, other factors
|
||||||
|
// should take precedence in preference, e.g. the standard-defined preferences
|
||||||
|
// like argument conversion ranks or enable_if partial-ordering. The
|
||||||
|
// preference for pass-object-size parameters is probably most similar to a
|
||||||
|
// type-based-overloading decision and so should take priority.
|
||||||
|
//
|
||||||
|
// If other rules cannot determine which is better, CUDA preference will be
|
||||||
|
// used again to determine which is better.
|
||||||
|
//
|
||||||
|
// TODO: Currently IdentifyCUDAPreference does not return correct values
|
||||||
|
// for functions called in global variable initializers due to missing
|
||||||
|
// correct context about device/host. Therefore we can only enforce this
|
||||||
|
// rule when there is a caller. We should enforce this rule for functions
|
||||||
|
// in global variable initializers once proper context is added.
|
||||||
|
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
|
||||||
|
if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
|
||||||
|
auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
|
||||||
|
auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
|
||||||
|
assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
|
||||||
|
auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
|
||||||
|
auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
|
||||||
|
if (Cand1Emittable && !Cand2Emittable)
|
||||||
|
return true;
|
||||||
|
if (!Cand1Emittable && Cand2Emittable)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// C++ [over.match.best]p1:
|
// C++ [over.match.best]p1:
|
||||||
//
|
//
|
||||||
// -- if F is a static member function, ICS1(F) is defined such
|
// -- if F is a static member function, ICS1(F) is defined such
|
||||||
|
@ -9709,12 +9763,6 @@ bool clang::isBetterOverloadCandidate(
|
||||||
return Cmp == Comparison::Better;
|
return Cmp == Comparison::Better;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
|
|
||||||
FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
|
|
||||||
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
|
|
||||||
S.IdentifyCUDAPreference(Caller, Cand2.Function);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool HasPS1 = Cand1.Function != nullptr &&
|
bool HasPS1 = Cand1.Function != nullptr &&
|
||||||
functionHasPassObjectSizeParams(Cand1.Function);
|
functionHasPassObjectSizeParams(Cand1.Function);
|
||||||
bool HasPS2 = Cand2.Function != nullptr &&
|
bool HasPS2 = Cand2.Function != nullptr &&
|
||||||
|
@ -9722,7 +9770,22 @@ bool clang::isBetterOverloadCandidate(
|
||||||
if (HasPS1 != HasPS2 && HasPS1)
|
if (HasPS1 != HasPS2 && HasPS1)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return isBetterMultiversionCandidate(Cand1, Cand2);
|
auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
|
||||||
|
if (MV == Comparison::Better)
|
||||||
|
return true;
|
||||||
|
if (MV == Comparison::Worse)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If other rules cannot determine which is better, CUDA preference is used
|
||||||
|
// to determine which is better.
|
||||||
|
if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
|
||||||
|
if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
|
||||||
|
return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
|
||||||
|
S.IdentifyCUDAPreference(Caller, Cand2.Function);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether two declarations are "equivalent" for the purposes of
|
/// Determine whether two declarations are "equivalent" for the purposes of
|
||||||
|
@ -9808,33 +9871,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
|
||||||
std::transform(begin(), end(), std::back_inserter(Candidates),
|
std::transform(begin(), end(), std::back_inserter(Candidates),
|
||||||
[](OverloadCandidate &Cand) { return &Cand; });
|
[](OverloadCandidate &Cand) { return &Cand; });
|
||||||
|
|
||||||
// [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
|
|
||||||
// are accepted by both clang and NVCC. However, during a particular
|
|
||||||
// compilation mode only one call variant is viable. We need to
|
|
||||||
// exclude non-viable overload candidates from consideration based
|
|
||||||
// only on their host/device attributes. Specifically, if one
|
|
||||||
// candidate call is WrongSide and the other is SameSide, we ignore
|
|
||||||
// the WrongSide candidate.
|
|
||||||
if (S.getLangOpts().CUDA) {
|
|
||||||
const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
|
|
||||||
bool ContainsSameSideCandidate =
|
|
||||||
llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
|
|
||||||
// Check viable function only.
|
|
||||||
return Cand->Viable && Cand->Function &&
|
|
||||||
S.IdentifyCUDAPreference(Caller, Cand->Function) ==
|
|
||||||
Sema::CFP_SameSide;
|
|
||||||
});
|
|
||||||
if (ContainsSameSideCandidate) {
|
|
||||||
auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
|
|
||||||
// Check viable function only to avoid unnecessary data copying/moving.
|
|
||||||
return Cand->Viable && Cand->Function &&
|
|
||||||
S.IdentifyCUDAPreference(Caller, Cand->Function) ==
|
|
||||||
Sema::CFP_WrongSide;
|
|
||||||
};
|
|
||||||
llvm::erase_if(Candidates, IsWrongSideCandidate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the best viable function.
|
// Find the best viable function.
|
||||||
Best = end();
|
Best = end();
|
||||||
for (auto *Cand : Candidates) {
|
for (auto *Cand : Candidates) {
|
||||||
|
|
|
@ -331,9 +331,6 @@ __device__ void test_device_calls_template_fn() {
|
||||||
// If we have a mix of HD and H-only or D-only candidates in the overload set,
|
// If we have a mix of HD and H-only or D-only candidates in the overload set,
|
||||||
// normal C++ overload resolution rules apply first.
|
// normal C++ overload resolution rules apply first.
|
||||||
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
|
template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
|
||||||
#ifdef __CUDA_ARCH__
|
|
||||||
//expected-note@-2 {{declared here}}
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
return TemplateReturnTy();
|
return TemplateReturnTy();
|
||||||
}
|
}
|
||||||
|
@ -342,11 +339,13 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ __device__ void test_host_device_calls_hd_template() {
|
__host__ __device__ void test_host_device_calls_hd_template() {
|
||||||
HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
|
|
||||||
TemplateReturnTy ret2 = template_vs_hd_function(1);
|
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
// expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
|
typedef HostDeviceReturnTy ExpectedReturnTy;
|
||||||
|
#else
|
||||||
|
typedef TemplateReturnTy ExpectedReturnTy;
|
||||||
#endif
|
#endif
|
||||||
|
HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
|
||||||
|
ExpectedReturnTy ret2 = template_vs_hd_function(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void test_host_calls_hd_template() {
|
__host__ void test_host_calls_hd_template() {
|
||||||
|
@ -367,14 +366,14 @@ __device__ void test_device_calls_hd_template() {
|
||||||
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
|
__device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
|
||||||
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
|
__device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
|
||||||
#ifndef __CUDA_ARCH__
|
#ifndef __CUDA_ARCH__
|
||||||
// expected-note@-3 {{'device_only_function' declared here}}
|
// expected-note@-3 2{{'device_only_function' declared here}}
|
||||||
// expected-note@-3 {{'device_only_function' declared here}}
|
// expected-note@-3 2{{'device_only_function' declared here}}
|
||||||
#endif
|
#endif
|
||||||
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
|
__host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
|
||||||
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
|
__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
// expected-note@-3 {{'host_only_function' declared here}}
|
// expected-note@-3 2{{'host_only_function' declared here}}
|
||||||
// expected-note@-3 {{'host_only_function' declared here}}
|
// expected-note@-3 2{{'host_only_function' declared here}}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__host__ __device__ void test_host_device_single_side_overloading() {
|
__host__ __device__ void test_host_device_single_side_overloading() {
|
||||||
|
@ -392,6 +391,37 @@ __host__ __device__ void test_host_device_single_side_overloading() {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wrong-sided overloading should not cause diagnostic unless it is emitted.
|
||||||
|
// This inline function is not emitted.
|
||||||
|
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
|
||||||
|
DeviceReturnTy ret1 = device_only_function(1);
|
||||||
|
DeviceReturnTy2 ret2 = device_only_function(1.0f);
|
||||||
|
HostReturnTy ret3 = host_only_function(1);
|
||||||
|
HostReturnTy2 ret4 = host_only_function(1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wrong-sided overloading should cause diagnostic if it is emitted.
|
||||||
|
// This inline function is emitted since it is called by an emitted function.
|
||||||
|
inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
|
||||||
|
DeviceReturnTy ret1 = device_only_function(1);
|
||||||
|
DeviceReturnTy2 ret2 = device_only_function(1.0f);
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
// expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
|
||||||
|
// expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
|
||||||
|
#endif
|
||||||
|
HostReturnTy ret3 = host_only_function(1);
|
||||||
|
HostReturnTy2 ret4 = host_only_function(1.0f);
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
// expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
|
||||||
|
// expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
|
||||||
|
test_host_device_wrong_side_overloading_inline_diag();
|
||||||
|
// expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
|
||||||
|
}
|
||||||
|
|
||||||
// Verify that we allow overloading function templates.
|
// Verify that we allow overloading function templates.
|
||||||
template <typename T> __host__ T template_overload(const T &a) { return a; };
|
template <typename T> __host__ T template_overload(const T &a) { return a; };
|
||||||
template <typename T> __device__ T template_overload(const T &a) { return a; };
|
template <typename T> __device__ T template_overload(const T &a) { return a; };
|
||||||
|
|
Loading…
Reference in New Issue