diff options
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r-- | clang/lib/Sema/SemaCUDA.cpp | 42 |
1 files changed, 41 insertions, 1 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index d993499..318174f 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -678,6 +678,27 @@ void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { } } +void Sema::CUDARecordImplicitHostDeviceFuncUsedByDevice( + const FunctionDecl *Callee) { + FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true); + if (!Caller) + return; + + if (!isCUDAImplicitHostDeviceFunction(Callee)) + return; + + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); + + // Record whether an implicit host device function is used on device side. + if (CallerTarget != CFT_Device && CallerTarget != CFT_Global && + (CallerTarget != CFT_HostDevice || + (isCUDAImplicitHostDeviceFunction(Caller) && + !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(Caller)))) + return; + + getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(Callee); +} + // With -fcuda-host-device-constexpr, an unattributed constexpr function is // treated as implicitly __host__ __device__, unless: // * it is a variadic function (device-side variadic functions are not @@ -702,6 +723,18 @@ void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, return; } + // If a template function has no host/device/global attributes, + // make it implicitly host device function. + if (getLangOpts().OffloadImplicitHostDeviceTemplates && + !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && + !NewD->hasAttr<CUDAGlobalAttr>() && + (NewD->getDescribedFunctionTemplate() || + NewD->isFunctionTemplateSpecialization())) { + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + return; + } + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) @@ -950,7 +983,14 @@ void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, // HD/global functions "exist" in some sense on both the host and device, so // should have the same implementation on both sides. if (NewTarget != OldTarget && - ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || + ((NewTarget == CFT_HostDevice && + !(LangOpts.OffloadImplicitHostDeviceTemplates && + isCUDAImplicitHostDeviceFunction(NewFD) && + OldTarget == CFT_Device)) || + (OldTarget == CFT_HostDevice && + !(LangOpts.OffloadImplicitHostDeviceTemplates && + isCUDAImplicitHostDeviceFunction(OldFD) && + NewTarget == CFT_Device)) || (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, /* ConsiderCudaAttrs = */ false)) { |