[CUDA] Fix false-positive in known-emitted handling.

Previously: When compiling for host, our constructed call graph went
*through* kernel calls.  This meant that if we had

  host calls kernel calls HD

we would incorrectly mark the HD function as known-emitted on the host
side, and thus perform host-side checks on it.

Fixing this exposed another issue, wherein when marking a function as
known-emitted, we also need to traverse the callgraph of its template,
because non-dependent calls are attached to a function's template, not
its instantiation.

llvm-svn: 284355
This commit is contained in:
Justin Lebar 2016-10-17 02:25:55 +00:00
parent 715ad7fef5
commit d692dfb65e
2 changed files with 66 additions and 6 deletions

View File

@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
S.CUDAKnownEmittedFns.insert(Caller);
EmitDeferredDiags(S, Caller);
// Deferred diags are often emitted on the template itself, so emit those as
// well.
if (auto *Templ = Caller->getPrimaryTemplate())
EmitDeferredDiags(S, Templ->getAsFunction());
// If this is a template instantiation, explore its callgraph as well:
// Non-dependent calls are part of the template's callgraph, while dependent
// calls are part of to the instantiation's call graph.
if (auto *Templ = Caller->getPrimaryTemplate()) {
FunctionDecl *TemplFD = Templ->getAsFunction();
if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
Seen.insert(TemplFD);
Worklist.push_back(TemplFD);
}
}
// Add all functions called by Caller to our worklist.
auto CGIt = S.CUDACallGraph.find(Caller);
@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
if (!Caller)
return true;
// If the caller is known-emitted, mark the callee as known-emitted.
// Otherwise, mark the call in our call graph so we can traverse it later.
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
if (CallerKnownEmitted)
MarkKnownEmitted(*this, Callee);
else
CUDACallGraph[Caller].insert(Callee);
else {
// If we have
// host fn calls kernel fn calls host+device,
// the HD function does not get instantiated on the host. We model this by
// omitting at the call to the kernel from the callgraph. This ensures
// that, when compiling for host, only HD functions actually called from the
// host get marked as known-emitted.
if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
CUDACallGraph[Caller].insert(Callee);
}
CUDADiagBuilder::Kind DiagKind = [&] {
switch (IdentifyCUDAPreference(Caller, Callee)) {

View File

@ -0,0 +1,44 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
// Check that it's OK for kernels to call HD functions that call device-only
// functions.
#include "Inputs/cuda.h"
__device__ void device_fn(int) {}
// expected-note@-1 {{declared here}}
// expected-note@-2 {{declared here}}
inline __host__ __device__ int hd1() {
device_fn(0); // expected-error {{reference to __device__ function}}
return 0;
}
inline __host__ __device__ int hd2() {
// No error here because hd2 is only referenced from a kernel.
device_fn(0);
return 0;
}
inline __host__ __device__ void hd3(int) {
device_fn(0); // expected-error {{reference to __device__ function 'device_fn'}}
}
inline __host__ __device__ void hd3(double) {}
inline __host__ __device__ void hd4(int) {}
inline __host__ __device__ void hd4(double) {
device_fn(0); // No error; this function is never called.
}
__global__ void kernel(int) { hd2(); }
template <typename T>
void launch_kernel() {
kernel<<<0, 0>>>(T());
hd1();
hd3(T());
}
void host_fn() {
launch_kernel<int>();
}