aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenModule.cpp
diff options
context:
space:
mode:
authorYaxun (Sam) Liu <yaxun.liu@amd.com>2023-11-09 20:36:38 -0500
committerGitHub <noreply@github.com>2023-11-09 20:36:38 -0500
commit9774d0ce5fbd70288514da77072313b4f45b34bb (patch)
treeea2d1630ba47e22a7a5cec6b7c7a3194fb325947 /clang/lib/CodeGen/CodeGenModule.cpp
parentdd57bd0efe90eeb862473e4a354a67e0c925653e (diff)
downloadllvm-9774d0ce5fbd70288514da77072313b4f45b34bb.zip
llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.gz
llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.bz2
[CUDA][HIP] Make template implicitly host device (#70369)
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: https://github.com/llvm/llvm-project/issues/69956 Fixes: SWDEV-428314
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp22
1 files changed, 19 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 7535528..4c7f516 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -28,6 +28,7 @@
#include "CoverageMappingGen.h"
#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/ASTLambda.h"
#include "clang/AST/CharUnits.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
@@ -3560,6 +3561,14 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
return ConstantAddress(Aliasee, DeclTy, Alignment);
}
+template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
+ if (!D)
+ return false;
+ if (auto *A = D->getAttr<AttrT>())
+ return A->isImplicit();
+ return D->isImplicit();
+}
+
void CodeGenModule::EmitGlobal(GlobalDecl GD) {
const auto *Global = cast<ValueDecl>(GD.getDecl());
@@ -3581,16 +3590,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
return emitCPUDispatchDefinition(GD);
// If this is CUDA, be selective about which declarations we emit.
+ // Non-constexpr non-lambda implicit host device functions are not emitted
+ // unless they are used on device side.
if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {
- if (!Global->hasAttr<CUDADeviceAttr>() &&
+ const auto *FD = dyn_cast<FunctionDecl>(Global);
+ if ((!Global->hasAttr<CUDADeviceAttr>() ||
+ (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
+ hasImplicitAttr<CUDAHostAttr>(FD) &&
+ hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
+ !isLambdaCallOperator(FD) &&
+ !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&
!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
!Global->getType()->isCUDADeviceBuiltinTextureType() &&
- !(LangOpts.HIPStdPar &&
- isa<FunctionDecl>(Global) &&
+ !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
!Global->hasAttr<CUDAHostAttr>()))
return;
} else {