[CUDA] Fix false-positive in known-emitted handling.

Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. llvm-svn: 284355
2016-10-17 02:25:55 +00:00 · 2016-10-17 02:25:55 +00:00 · d692dfb65e
parent 715ad7fef5
commit d692dfb65e
2 changed files with 66 additions and 6 deletions
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
    S.CUDAKnownEmittedFns.insert(Caller);
    EmitDeferredDiags(S, Caller);

-    // Deferred diags are often emitted on the template itself, so emit those as
-    // well.
-    if (auto *Templ = Caller->getPrimaryTemplate())
-      EmitDeferredDiags(S, Templ->getAsFunction());
+    // If this is a template instantiation, explore its callgraph as well:
+    // Non-dependent calls are part of the template's callgraph, while dependent
+    // calls are part of to the instantiation's call graph.
+    if (auto *Templ = Caller->getPrimaryTemplate()) {
+      FunctionDecl *TemplFD = Templ->getAsFunction();
+      if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
+        Seen.insert(TemplFD);
+        Worklist.push_back(TemplFD);
+      }
+    }

    // Add all functions called by Caller to our worklist.
    auto CGIt = S.CUDACallGraph.find(Caller);
@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
  if (!Caller)
    return true;

+  // If the caller is known-emitted, mark the callee as known-emitted.
+  // Otherwise, mark the call in our call graph so we can traverse it later.
  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
  if (CallerKnownEmitted)
    MarkKnownEmitted(*this, Callee);
-  else
-    CUDACallGraph[Caller].insert(Callee);
+  else {
+    // If we have
+    //   host fn calls kernel fn calls host+device,
+    // the HD function does not get instantiated on the host.  We model this by
+    // omitting at the call to the kernel from the callgraph.  This ensures
+    // that, when compiling for host, only HD functions actually called from the
+    // host get marked as known-emitted.
+    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+      CUDACallGraph[Caller].insert(Callee);
+  }

  CUDADiagBuilder::Kind DiagKind = [&] {
    switch (IdentifyCUDAPreference(Caller, Callee)) {
--- a/clang/test/SemaCUDA/trace-through-global.cu
+++ b/clang/test/SemaCUDA/trace-through-global.cu
@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// Check that it's OK for kernels to call HD functions that call device-only
+// functions.
+
+#include "Inputs/cuda.h"
+
+__device__ void device_fn(int) {}
+// expected-note@-1 {{declared here}}
+// expected-note@-2 {{declared here}}
+
+inline __host__ __device__ int hd1() {
+  device_fn(0);  // expected-error {{reference to __device__ function}}
+  return 0;
+}
+
+inline __host__ __device__ int hd2() {
+  // No error here because hd2 is only referenced from a kernel.
+  device_fn(0);
+  return 0;
+}
+
+inline __host__ __device__ void hd3(int) {
+  device_fn(0);  // expected-error {{reference to __device__ function 'device_fn'}}
+}
+inline __host__ __device__ void hd3(double) {}
+
+inline __host__ __device__ void hd4(int) {}
+inline __host__ __device__ void hd4(double) {
+  device_fn(0);  // No error; this function is never called.
+}
+
+__global__ void kernel(int) { hd2(); }
+
+template <typename T>
+void launch_kernel() {
+  kernel<<<0, 0>>>(T());
+  hd1();
+  hd3(T());
+}
+
+void host_fn() {
+  launch_kernel<int>();
+}