aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-10-17 02:25:55 +0000
committerJustin Lebar <jlebar@google.com>2016-10-17 02:25:55 +0000
commitd692dfb65e92095d7bdab32543cb967f08fe7745 (patch)
treea37acc3f90913969d750ef096c8773ed3603d5b9 /clang/lib/Sema/SemaCUDA.cpp
parent715ad7fef5d42a2ab001a2528027a3aa741f5b7c (diff)
downloadllvm-d692dfb65e92095d7bdab32543cb967f08fe7745.zip
llvm-d692dfb65e92095d7bdab32543cb967f08fe7745.tar.gz
llvm-d692dfb65e92095d7bdab32543cb967f08fe7745.tar.bz2
[CUDA] Fix false-positive in known-emitted handling.
Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. llvm-svn: 284355
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp28
1 files changed, 22 insertions, 6 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 18751d4..75ec5f2 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
S.CUDAKnownEmittedFns.insert(Caller);
EmitDeferredDiags(S, Caller);
- // Deferred diags are often emitted on the template itself, so emit those as
- // well.
- if (auto *Templ = Caller->getPrimaryTemplate())
- EmitDeferredDiags(S, Templ->getAsFunction());
+ // If this is a template instantiation, explore its callgraph as well:
+ // Non-dependent calls are part of the template's callgraph, while dependent
+ // calls are part of to the instantiation's call graph.
+ if (auto *Templ = Caller->getPrimaryTemplate()) {
+ FunctionDecl *TemplFD = Templ->getAsFunction();
+ if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
+ Seen.insert(TemplFD);
+ Worklist.push_back(TemplFD);
+ }
+ }
// Add all functions called by Caller to our worklist.
auto CGIt = S.CUDACallGraph.find(Caller);
@@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
if (!Caller)
return true;
+ // If the caller is known-emitted, mark the callee as known-emitted.
+ // Otherwise, mark the call in our call graph so we can traverse it later.
bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
if (CallerKnownEmitted)
MarkKnownEmitted(*this, Callee);
- else
- CUDACallGraph[Caller].insert(Callee);
+ else {
+ // If we have
+ // host fn calls kernel fn calls host+device,
+ // the HD function does not get instantiated on the host. We model this by
+ // omitting at the call to the kernel from the callgraph. This ensures
+ // that, when compiling for host, only HD functions actually called from the
+ // host get marked as known-emitted.
+ if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+ CUDACallGraph[Caller].insert(Callee);
+ }
CUDADiagBuilder::Kind DiagKind = [&] {
switch (IdentifyCUDAPreference(Caller, Callee)) {