forked from OSchip/llvm-project
[CUDA] Fix false-positive in known-emitted handling.
Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. llvm-svn: 284355
This commit is contained in:
parent
715ad7fef5
commit
d692dfb65e
|
@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
|
|||
S.CUDAKnownEmittedFns.insert(Caller);
|
||||
EmitDeferredDiags(S, Caller);
|
||||
|
||||
// Deferred diags are often emitted on the template itself, so emit those as
|
||||
// well.
|
||||
if (auto *Templ = Caller->getPrimaryTemplate())
|
||||
EmitDeferredDiags(S, Templ->getAsFunction());
|
||||
// If this is a template instantiation, explore its callgraph as well:
|
||||
// Non-dependent calls are part of the template's callgraph, while dependent
|
||||
// calls are part of to the instantiation's call graph.
|
||||
if (auto *Templ = Caller->getPrimaryTemplate()) {
|
||||
FunctionDecl *TemplFD = Templ->getAsFunction();
|
||||
if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
|
||||
Seen.insert(TemplFD);
|
||||
Worklist.push_back(TemplFD);
|
||||
}
|
||||
}
|
||||
|
||||
// Add all functions called by Caller to our worklist.
|
||||
auto CGIt = S.CUDACallGraph.find(Caller);
|
||||
|
@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
|
|||
if (!Caller)
|
||||
return true;
|
||||
|
||||
// If the caller is known-emitted, mark the callee as known-emitted.
|
||||
// Otherwise, mark the call in our call graph so we can traverse it later.
|
||||
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
|
||||
if (CallerKnownEmitted)
|
||||
MarkKnownEmitted(*this, Callee);
|
||||
else
|
||||
CUDACallGraph[Caller].insert(Callee);
|
||||
else {
|
||||
// If we have
|
||||
// host fn calls kernel fn calls host+device,
|
||||
// the HD function does not get instantiated on the host. We model this by
|
||||
// omitting at the call to the kernel from the callgraph. This ensures
|
||||
// that, when compiling for host, only HD functions actually called from the
|
||||
// host get marked as known-emitted.
|
||||
if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
|
||||
CUDACallGraph[Caller].insert(Callee);
|
||||
}
|
||||
|
||||
CUDADiagBuilder::Kind DiagKind = [&] {
|
||||
switch (IdentifyCUDAPreference(Caller, Callee)) {
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
||||
|
||||
// Check that it's OK for kernels to call HD functions that call device-only
|
||||
// functions.
|
||||
|
||||
#include "Inputs/cuda.h"
|
||||
|
||||
__device__ void device_fn(int) {}
|
||||
// expected-note@-1 {{declared here}}
|
||||
// expected-note@-2 {{declared here}}
|
||||
|
||||
inline __host__ __device__ int hd1() {
|
||||
device_fn(0); // expected-error {{reference to __device__ function}}
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline __host__ __device__ int hd2() {
|
||||
// No error here because hd2 is only referenced from a kernel.
|
||||
device_fn(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline __host__ __device__ void hd3(int) {
|
||||
device_fn(0); // expected-error {{reference to __device__ function 'device_fn'}}
|
||||
}
|
||||
inline __host__ __device__ void hd3(double) {}
|
||||
|
||||
inline __host__ __device__ void hd4(int) {}
|
||||
inline __host__ __device__ void hd4(double) {
|
||||
device_fn(0); // No error; this function is never called.
|
||||
}
|
||||
|
||||
__global__ void kernel(int) { hd2(); }
|
||||
|
||||
template <typename T>
|
||||
void launch_kernel() {
|
||||
kernel<<<0, 0>>>(T());
|
||||
hd1();
|
||||
hd3(T());
|
||||
}
|
||||
|
||||
void host_fn() {
|
||||
launch_kernel<int>();
|
||||
}
|
Loading…
Reference in New Issue