diff options
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9512b35..1efc39b 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4483,13 +4483,16 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator( // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. // - // We don't currently support CUDA device code spread out across multiple TUs, + // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU, // so say that CUDA templates are either external (for kernels) or internal. - // This lets llvm perform aggressive inter-procedural optimizations. + // This lets llvm perform aggressive inter-procedural optimizations. For + // -fgpu-rdc case, device function calls across multiple TU's are allowed, + // therefore we need to follow the normal linkage paradigm. if (Linkage == GVA_StrongODR) { - if (Context.getLangOpts().AppleKext) + if (getLangOpts().AppleKext) return llvm::Function::ExternalLinkage; - if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice) + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + !getLangOpts().GPURelocatableDeviceCode) return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; return llvm::Function::WeakODRLinkage; |