diff options
author | Yaxun (Sam) Liu <yaxun.liu@amd.com> | 2023-11-09 20:36:38 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-09 20:36:38 -0500 |
commit | 9774d0ce5fbd70288514da77072313b4f45b34bb (patch) | |
tree | ea2d1630ba47e22a7a5cec6b7c7a3194fb325947 /clang/lib/CodeGen/CodeGenModule.cpp | |
parent | dd57bd0efe90eeb862473e4a354a67e0c925653e (diff) | |
download | llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.zip llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.gz llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.bz2 |
[CUDA][HIP] Make template implicitly host device (#70369)
Added option -foffload-implicit-host-device-templates which is off by
default.
When the option is on, template functions and specializations without
host/device attributes have implicit host device attributes.
They can be overridden by device template functions with the same
signagure.
They are emitted on device side only if they are used on device side.
This feature is added as an extension.
`__has_extension(cuda_implicit_host_device_templates)` can be used to
check whether it is enabled.
This is to facilitate using standard C++ headers for device.
Fixes: https://github.com/llvm/llvm-project/issues/69956
Fixes: SWDEV-428314
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.cpp')
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 7535528..4c7f516 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -28,6 +28,7 @@ #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/ASTLambda.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -3560,6 +3561,14 @@ ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { return ConstantAddress(Aliasee, DeclTy, Alignment); } +template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) { + if (!D) + return false; + if (auto *A = D->getAttr<AttrT>()) + return A->isImplicit(); + return D->isImplicit(); +} + void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast<ValueDecl>(GD.getDecl()); @@ -3581,16 +3590,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { return emitCPUDispatchDefinition(GD); // If this is CUDA, be selective about which declarations we emit. + // Non-constexpr non-lambda implicit host device functions are not emitted + // unless they are used on device side. if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { - if (!Global->hasAttr<CUDADeviceAttr>() && + const auto *FD = dyn_cast<FunctionDecl>(Global); + if ((!Global->hasAttr<CUDADeviceAttr>() || + (LangOpts.OffloadImplicitHostDeviceTemplates && FD && + hasImplicitAttr<CUDAHostAttr>(FD) && + hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() && + !isLambdaCallOperator(FD) && + !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) && !Global->hasAttr<CUDAGlobalAttr>() && !Global->hasAttr<CUDAConstantAttr>() && !Global->hasAttr<CUDASharedAttr>() && !Global->getType()->isCUDADeviceBuiltinSurfaceType() && !Global->getType()->isCUDADeviceBuiltinTextureType() && - !(LangOpts.HIPStdPar && - isa<FunctionDecl>(Global) && + !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>())) return; } else { |