aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
authorYaxun (Sam) Liu <yaxun.liu@amd.com>2023-11-09 20:36:38 -0500
committerGitHub <noreply@github.com>2023-11-09 20:36:38 -0500
commit9774d0ce5fbd70288514da77072313b4f45b34bb (patch)
treeea2d1630ba47e22a7a5cec6b7c7a3194fb325947 /clang/lib/Sema/SemaCUDA.cpp
parentdd57bd0efe90eeb862473e4a354a67e0c925653e (diff)
downloadllvm-9774d0ce5fbd70288514da77072313b4f45b34bb.zip
llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.gz
llvm-9774d0ce5fbd70288514da77072313b4f45b34bb.tar.bz2
[CUDA][HIP] Make template implicitly host device (#70369)
Added option -foffload-implicit-host-device-templates which is off by default. When the option is on, template functions and specializations without host/device attributes have implicit host device attributes. They can be overridden by device template functions with the same signagure. They are emitted on device side only if they are used on device side. This feature is added as an extension. `__has_extension(cuda_implicit_host_device_templates)` can be used to check whether it is enabled. This is to facilitate using standard C++ headers for device. Fixes: https://github.com/llvm/llvm-project/issues/69956 Fixes: SWDEV-428314
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp42
1 files changed, 41 insertions, 1 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index d993499..318174f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -678,6 +678,27 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) {
}
}
+void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice(
+ const FunctionDecl *Callee) {
+ FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+ if (!Caller)
+ return;
+
+ if (!isCUDAImplicitHostDeviceFunction(Callee))
+ return;
+
+ CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
+
+ // Record whether an implicit host device function is used on device side.
+ if (CallerTarget != CFT_Device && CallerTarget != CFT_Global &&
+ (CallerTarget != CFT_HostDevice ||
+ (isCUDAImplicitHostDeviceFunction(Caller) &&
+ !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller))))
+ return;
+
+ getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee);
+}
+
// With -fcuda-host-device-constexpr, an unattributed constexpr function is
// treated as implicitly __host__ __device__, unless:
// * it is a variadic function (device-side variadic functions are not
@@ -702,6 +723,18 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
return;
}
+ // If a template function has no host/device/global attributes,
+ // make it implicitly host device function.
+ if (getLangOpts().OffloadImplicitHostDeviceTemplates &&
+ !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() &&
+ !NewD->hasAttr<CUDAGlobalAttr>() &&
+ (NewD->getDescribedFunctionTemplate() ||
+ NewD->isFunctionTemplateSpecialization())) {
+ NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
+ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+ return;
+ }
+
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
@@ -950,7 +983,14 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
// HD/global functions "exist" in some sense on both the host and device, so
// should have the same implementation on both sides.
if (NewTarget != OldTarget &&
- ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
+ ((NewTarget == CFT_HostDevice &&
+ !(LangOpts.OffloadImplicitHostDeviceTemplates &&
+ isCUDAImplicitHostDeviceFunction(NewFD) &&
+ OldTarget == CFT_Device)) ||
+ (OldTarget == CFT_HostDevice &&
+ !(LangOpts.OffloadImplicitHostDeviceTemplates &&
+ isCUDAImplicitHostDeviceFunction(OldFD) &&
+ NewTarget == CFT_Device)) ||
(NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
!IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
/* ConsiderCudaAttrs = */ false)) {