recommit c77a4078e0

2020-04-24 16:41:24 -04:00 · 2020-04-24 16:41:24 -04:00 · b46b1a916d
parent a8e5dcb072
commit b46b1a916d
2 changed files with 122 additions and 56 deletions
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@ -9374,16 +9374,22 @@ static Comparison compareEnableIfAttrs(const Sema &S, const FunctionDecl *Cand1,
  return Comparison::Equal;
 }
-static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
+static Comparison
-                                          const OverloadCandidate &Cand2) {
+isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
                              const OverloadCandidate &Cand2) {
  if (!Cand1.Function || !Cand1.Function->isMultiVersion() || !Cand2.Function ||
      !Cand2.Function->isMultiVersion())
-    return false;
+    return Comparison::Equal;
-  // If Cand1 is invalid, it cannot be a better match, if Cand2 is invalid, this
+  // If both are invalid, they are equal. If one of them is invalid, the other
-  // is obviously better.
+  // is better.
-  if (Cand1.Function->isInvalidDecl()) return false;
+  if (Cand1.Function->isInvalidDecl()) {
-  if (Cand2.Function->isInvalidDecl()) return true;
+    if (Cand2.Function->isInvalidDecl())
      return Comparison::Equal;
    return Comparison::Worse;
  }
  if (Cand2.Function->isInvalidDecl())
    return Comparison::Better;
  // If this is a cpu_dispatch/cpu_specific multiversion situation, prefer
  // cpu_dispatch, else arbitrarily based on the identifiers.
@ -9393,16 +9399,18 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
  const auto *Cand2CPUSpec = Cand2.Function->getAttr<CPUSpecificAttr>();
  if (!Cand1CPUDisp && !Cand2CPUDisp && !Cand1CPUSpec && !Cand2CPUSpec)
-    return false;
+    return Comparison::Equal;
  if (Cand1CPUDisp && !Cand2CPUDisp)
-    return true;
+    return Comparison::Better;
  if (Cand2CPUDisp && !Cand1CPUDisp)
-    return false;
+    return Comparison::Worse;
  if (Cand1CPUSpec && Cand2CPUSpec) {
    if (Cand1CPUSpec->cpus_size() != Cand2CPUSpec->cpus_size())
-      return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size();
+      return Cand1CPUSpec->cpus_size() < Cand2CPUSpec->cpus_size()
                 ? Comparison::Better
                 : Comparison::Worse;
    std::pair<CPUSpecificAttr::cpus_iterator, CPUSpecificAttr::cpus_iterator>
        FirstDiff = std::mismatch(
@ -9415,7 +9423,9 @@ static bool isBetterMultiversionCandidate(const OverloadCandidate &Cand1,
    assert(FirstDiff.first != Cand1CPUSpec->cpus_end() &&
           "Two different cpu-specific versions should not have the same "
           "identifier list, otherwise they'd be the same decl!");
-    return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName();
+    return (*FirstDiff.first)->getName() < (*FirstDiff.second)->getName()
               ? Comparison::Better
               : Comparison::Worse;
  }
  llvm_unreachable("No way to get here unless both had cpu_dispatch");
 }
@ -9475,6 +9485,50 @@ bool clang::isBetterOverloadCandidate(
  else if (!Cand1.Viable)
    return false;
  // [CUDA] A function with 'never' preference is marked not viable, therefore
  // is never shown up here. The worst preference shown up here is 'wrong side',
  // e.g. a host function called by a device host function in device
  // compilation. This is valid AST as long as the host device function is not
  // emitted, e.g. it is an inline function which is called only by a host
  // function. A deferred diagnostic will be triggered if it is emitted.
  // However a wrong-sided function is still a viable candidate here.
  //
  // If Cand1 can be emitted and Cand2 cannot be emitted in the current
  // context, Cand1 is better than Cand2. If Cand1 can not be emitted and Cand2
  // can be emitted, Cand1 is not better than Cand2. This rule should have
  // precedence over other rules.
  //
  // If both Cand1 and Cand2 can be emitted, or neither can be emitted, then
  // other rules should be used to determine which is better. This is because
  // host/device based overloading resolution is mostly for determining
  // viability of a function. If two functions are both viable, other factors
  // should take precedence in preference, e.g. the standard-defined preferences
  // like argument conversion ranks or enable_if partial-ordering. The
  // preference for pass-object-size parameters is probably most similar to a
  // type-based-overloading decision and so should take priority.
  //
  // If other rules cannot determine which is better, CUDA preference will be
  // used again to determine which is better.
  //
  // TODO: Currently IdentifyCUDAPreference does not return correct values
  // for functions called in global variable initializers due to missing
  // correct context about device/host. Therefore we can only enforce this
  // rule when there is a caller. We should enforce this rule for functions
  // in global variable initializers once proper context is added.
  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
    if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
      auto P1 = S.IdentifyCUDAPreference(Caller, Cand1.Function);
      auto P2 = S.IdentifyCUDAPreference(Caller, Cand2.Function);
      assert(P1 != Sema::CFP_Never && P2 != Sema::CFP_Never);
      auto Cand1Emittable = P1 > Sema::CFP_WrongSide;
      auto Cand2Emittable = P2 > Sema::CFP_WrongSide;
      if (Cand1Emittable && !Cand2Emittable)
        return true;
      if (!Cand1Emittable && Cand2Emittable)
        return false;
    }
  }
  // C++ [over.match.best]p1:
  //
  //   -- if F is a static member function, ICS1(F) is defined such
@ -9709,12 +9763,6 @@ bool clang::isBetterOverloadCandidate(
      return Cmp == Comparison::Better;
  }
  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
           S.IdentifyCUDAPreference(Caller, Cand2.Function);
  }
  bool HasPS1 = Cand1.Function != nullptr &&
                functionHasPassObjectSizeParams(Cand1.Function);
  bool HasPS2 = Cand2.Function != nullptr &&
@ -9722,7 +9770,22 @@ bool clang::isBetterOverloadCandidate(
  if (HasPS1 != HasPS2 && HasPS1)
    return true;
-  return isBetterMultiversionCandidate(Cand1, Cand2);
+  auto MV = isBetterMultiversionCandidate(Cand1, Cand2);
  if (MV == Comparison::Better)
    return true;
  if (MV == Comparison::Worse)
    return false;
  // If other rules cannot determine which is better, CUDA preference is used
  // to determine which is better.
  if (S.getLangOpts().CUDA && Cand1.Function && Cand2.Function) {
    if (FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext)) {
      return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
             S.IdentifyCUDAPreference(Caller, Cand2.Function);
    }
  }
  return false;
 }
 /// Determine whether two declarations are "equivalent" for the purposes of
@ -9808,33 +9871,6 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
  std::transform(begin(), end(), std::back_inserter(Candidates),
                 [](OverloadCandidate &Cand) { return &Cand; });
  // [CUDA] HD->H or HD->D calls are technically not allowed by CUDA but
  // are accepted by both clang and NVCC. However, during a particular
  // compilation mode only one call variant is viable. We need to
  // exclude non-viable overload candidates from consideration based
  // only on their host/device attributes. Specifically, if one
  // candidate call is WrongSide and the other is SameSide, we ignore
  // the WrongSide candidate.
  if (S.getLangOpts().CUDA) {
    const FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
    bool ContainsSameSideCandidate =
        llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
          // Check viable function only.
          return Cand->Viable && Cand->Function &&
                 S.IdentifyCUDAPreference(Caller, Cand->Function) ==
                     Sema::CFP_SameSide;
        });
    if (ContainsSameSideCandidate) {
      auto IsWrongSideCandidate = [&](OverloadCandidate *Cand) {
        // Check viable function only to avoid unnecessary data copying/moving.
        return Cand->Viable && Cand->Function &&
               S.IdentifyCUDAPreference(Caller, Cand->Function) ==
                   Sema::CFP_WrongSide;
      };
      llvm::erase_if(Candidates, IsWrongSideCandidate);
    }
  }
  // Find the best viable function.
  Best = end();
  for (auto *Cand : Candidates) {
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@ -331,9 +331,6 @@ __device__ void test_device_calls_template_fn() {
 // If we have a mix of HD and H-only or D-only candidates in the overload set,
 // normal C++ overload resolution rules apply first.
 template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
 #ifdef __CUDA_ARCH__
 //expected-note@-2 {{declared here}}
 #endif
 {
  return TemplateReturnTy();
 }
@ -342,11 +339,13 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
 }
 __host__ __device__ void test_host_device_calls_hd_template() {
  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
  TemplateReturnTy ret2 = template_vs_hd_function(1);
 #ifdef __CUDA_ARCH__
-  // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
+  typedef HostDeviceReturnTy ExpectedReturnTy;
 #else
  typedef TemplateReturnTy ExpectedReturnTy;
 #endif
  HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
  ExpectedReturnTy ret2 = template_vs_hd_function(1);
 }
 __host__ void test_host_calls_hd_template() {
@ -367,14 +366,14 @@ __device__ void test_device_calls_hd_template() {
 __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
 __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
 #ifndef __CUDA_ARCH__
-  // expected-note@-3 {{'device_only_function' declared here}}
+  // expected-note@-3 2{{'device_only_function' declared here}}
-  // expected-note@-3 {{'device_only_function' declared here}}
+  // expected-note@-3 2{{'device_only_function' declared here}}
 #endif
 __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
 __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
 #ifdef __CUDA_ARCH__
-  // expected-note@-3 {{'host_only_function' declared here}}
+  // expected-note@-3 2{{'host_only_function' declared here}}
-  // expected-note@-3 {{'host_only_function' declared here}}
+  // expected-note@-3 2{{'host_only_function' declared here}}
 #endif
 __host__ __device__ void test_host_device_single_side_overloading() {
@ -392,6 +391,37 @@ __host__ __device__ void test_host_device_single_side_overloading() {
 #endif
 }
 // wrong-sided overloading should not cause diagnostic unless it is emitted.
 // This inline function is not emitted.
 inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_no_diag() {
  DeviceReturnTy ret1 = device_only_function(1);
  DeviceReturnTy2 ret2 = device_only_function(1.0f);
  HostReturnTy ret3 = host_only_function(1);
  HostReturnTy2 ret4 = host_only_function(1.0f);
 }
 // wrong-sided overloading should cause diagnostic if it is emitted.
 // This inline function is emitted since it is called by an emitted function.
 inline __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag() {
  DeviceReturnTy ret1 = device_only_function(1);
  DeviceReturnTy2 ret2 = device_only_function(1.0f);
 #ifndef __CUDA_ARCH__
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
 #endif
  HostReturnTy ret3 = host_only_function(1);
  HostReturnTy2 ret4 = host_only_function(1.0f);
 #ifdef __CUDA_ARCH__
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
 #endif
 }
 __host__ __device__ void test_host_device_wrong_side_overloading_inline_diag_caller() {
  test_host_device_wrong_side_overloading_inline_diag();
  // expected-note@-1 {{called by 'test_host_device_wrong_side_overloading_inline_diag_caller'}}
 }
 // Verify that we allow overloading function templates.
 template <typename T> __host__ T template_overload(const T &a) { return a; };
 template <typename T> __device__ T template_overload(const T &a) { return a; };