forked from OSchip/llvm-project
[CUDA] Fix false-positive in known-emitted handling.
Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. llvm-svn: 284355
This commit is contained in:
parent
715ad7fef5
commit
d692dfb65e
|
@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
|
||||||
S.CUDAKnownEmittedFns.insert(Caller);
|
S.CUDAKnownEmittedFns.insert(Caller);
|
||||||
EmitDeferredDiags(S, Caller);
|
EmitDeferredDiags(S, Caller);
|
||||||
|
|
||||||
// Deferred diags are often emitted on the template itself, so emit those as
|
// If this is a template instantiation, explore its callgraph as well:
|
||||||
// well.
|
// Non-dependent calls are part of the template's callgraph, while dependent
|
||||||
if (auto *Templ = Caller->getPrimaryTemplate())
|
// calls are part of to the instantiation's call graph.
|
||||||
EmitDeferredDiags(S, Templ->getAsFunction());
|
if (auto *Templ = Caller->getPrimaryTemplate()) {
|
||||||
|
FunctionDecl *TemplFD = Templ->getAsFunction();
|
||||||
|
if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
|
||||||
|
Seen.insert(TemplFD);
|
||||||
|
Worklist.push_back(TemplFD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Add all functions called by Caller to our worklist.
|
// Add all functions called by Caller to our worklist.
|
||||||
auto CGIt = S.CUDACallGraph.find(Caller);
|
auto CGIt = S.CUDACallGraph.find(Caller);
|
||||||
|
@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
|
||||||
if (!Caller)
|
if (!Caller)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
// If the caller is known-emitted, mark the callee as known-emitted.
|
||||||
|
// Otherwise, mark the call in our call graph so we can traverse it later.
|
||||||
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
|
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
|
||||||
if (CallerKnownEmitted)
|
if (CallerKnownEmitted)
|
||||||
MarkKnownEmitted(*this, Callee);
|
MarkKnownEmitted(*this, Callee);
|
||||||
else
|
else {
|
||||||
CUDACallGraph[Caller].insert(Callee);
|
// If we have
|
||||||
|
// host fn calls kernel fn calls host+device,
|
||||||
|
// the HD function does not get instantiated on the host. We model this by
|
||||||
|
// omitting at the call to the kernel from the callgraph. This ensures
|
||||||
|
// that, when compiling for host, only HD functions actually called from the
|
||||||
|
// host get marked as known-emitted.
|
||||||
|
if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
|
||||||
|
CUDACallGraph[Caller].insert(Callee);
|
||||||
|
}
|
||||||
|
|
||||||
CUDADiagBuilder::Kind DiagKind = [&] {
|
CUDADiagBuilder::Kind DiagKind = [&] {
|
||||||
switch (IdentifyCUDAPreference(Caller, Callee)) {
|
switch (IdentifyCUDAPreference(Caller, Callee)) {
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
||||||
|
|
||||||
|
// Check that it's OK for kernels to call HD functions that call device-only
|
||||||
|
// functions.
|
||||||
|
|
||||||
|
#include "Inputs/cuda.h"
|
||||||
|
|
||||||
|
__device__ void device_fn(int) {}
|
||||||
|
// expected-note@-1 {{declared here}}
|
||||||
|
// expected-note@-2 {{declared here}}
|
||||||
|
|
||||||
|
inline __host__ __device__ int hd1() {
|
||||||
|
device_fn(0); // expected-error {{reference to __device__ function}}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __host__ __device__ int hd2() {
|
||||||
|
// No error here because hd2 is only referenced from a kernel.
|
||||||
|
device_fn(0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __host__ __device__ void hd3(int) {
|
||||||
|
device_fn(0); // expected-error {{reference to __device__ function 'device_fn'}}
|
||||||
|
}
|
||||||
|
inline __host__ __device__ void hd3(double) {}
|
||||||
|
|
||||||
|
inline __host__ __device__ void hd4(int) {}
|
||||||
|
inline __host__ __device__ void hd4(double) {
|
||||||
|
device_fn(0); // No error; this function is never called.
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void kernel(int) { hd2(); }
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void launch_kernel() {
|
||||||
|
kernel<<<0, 0>>>(T());
|
||||||
|
hd1();
|
||||||
|
hd3(T());
|
||||||
|
}
|
||||||
|
|
||||||
|
void host_fn() {
|
||||||
|
launch_kernel<int>();
|
||||||
|
}
|
Loading…
Reference in New Issue