aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Sema/SemaCUDA.cpp
diff options
context:
space:
mode:
authorYaxun (Sam) Liu <yaxun.liu@amd.com>2023-08-29 09:49:15 -0400
committerYaxun (Sam) Liu <yaxun.liu@amd.com>2023-08-29 10:17:24 -0400
commitde0df639724b10001ea9a74539381ea494296be9 (patch)
tree4936f2f9cd2b4deb85f2bef48b255e6b94512543 /clang/lib/Sema/SemaCUDA.cpp
parent1c5fd1534cfd95fb4ce6356aa7719c7dbe37bee9 (diff)
downloadllvm-de0df639724b10001ea9a74539381ea494296be9.zip
llvm-de0df639724b10001ea9a74539381ea494296be9.tar.gz
llvm-de0df639724b10001ea9a74539381ea494296be9.tar.bz2
[CUDA][HIP] Fix overloading resolution in global variable initializer
Currently, clang does not resolve certain overloaded functions correctly in the initializer of global variables, e.g. template<typename T1, typename U> T1 mypow(T1, U); __attribute__((device)) double mypow(double, int); double t_extent = mypow(1.0, 2); In the above example, mypow is supposed to resolve to the host version but clang resolves it to the device version instead, and emits an error (https://godbolt.org/z/17xxzaa67). However, if the variable is assigned in a host function, there is no error. The discrepancy in overloading resolution inside and outside of a function is due to clang not accounting for the host/device target when resolving functions called in the initializer of a global variable. This patch introduces a global host/device target context for CUDA/HIP for functions called outside of functions. For global variable initialization, it is determined by the host/device attribute of the variable. For other situations, a default value of host_device is sufficient. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D158247 Fixes: SWDEV-416731
Diffstat (limited to 'clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp24
1 files changed, 21 insertions, 3 deletions
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index cfea649..88f5484 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
}
template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
return isa<A>(Attribute) &&
!(IgnoreImplicitAttr && Attribute->isImplicit());
});
}
+Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
+ CUDATargetContextKind K,
+ Decl *D)
+ : S(S_) {
+ SavedCtx = S.CurCUDATargetCtx;
+ assert(K == CTCK_InitGlobalVar);
+ auto *VD = dyn_cast_or_null<VarDecl>(D);
+ if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
+ auto Target = CFT_Host;
+ if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
+ !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
+ hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
+ hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
+ Target = CFT_Device;
+ S.CurCUDATargetCtx = {Target, K, VD};
+ }
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
bool IgnoreImplicitHDAttr) {
- // Code that lives outside a function is run on the host.
+ // Code that lives outside a function gets the target from CurCUDATargetCtx.
if (D == nullptr)
- return CFT_Host;
+ return CurCUDATargetCtx.Target;
if (D->hasAttr<CUDAInvalidTargetAttr>())
return CFT_InvalidTarget;