From de0df639724b10001ea9a74539381ea494296be9 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 29 Aug 2023 09:49:15 -0400 Subject: [CUDA][HIP] Fix overloading resolution in global variable initializer Currently, clang does not resolve certain overloaded functions correctly in the initializer of global variables, e.g. template T1 mypow(T1, U); __attribute__((device)) double mypow(double, int); double t_extent = mypow(1.0, 2); In the above example, mypow is supposed to resolve to the host version but clang resolves it to the device version instead, and emits an error (https://godbolt.org/z/17xxzaa67). However, if the variable is assigned in a host function, there is no error. The discrepancy in overloading resolution inside and outside of a function is due to clang not accounting for the host/device target when resolving functions called in the initializer of a global variable. This patch introduces a global host/device target context for CUDA/HIP for functions called outside of functions. For global variable initialization, it is determined by the host/device attribute of the variable. For other situations, a default value of host_device is sufficient. Reviewed by: Artem Belevich Differential Revision: https://reviews.llvm.org/D158247 Fixes: SWDEV-416731 --- clang/lib/Sema/SemaCUDA.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'clang/lib/Sema/SemaCUDA.cpp') diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index cfea649..88f5484 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { } template -static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { +static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { return isa(Attribute) && !(IgnoreImplicitAttr && Attribute->isImplicit()); }); } +Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_, + CUDATargetContextKind K, + Decl *D) + : S(S_) { + SavedCtx = S.CurCUDATargetCtx; + assert(K == CTCK_InitGlobalVar); + auto *VD = dyn_cast_or_null(D); + if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { + auto Target = CFT_Host; + if ((hasAttr(VD, /*IgnoreImplicit=*/true) && + !hasAttr(VD, /*IgnoreImplicit=*/true)) || + hasAttr(VD, /*IgnoreImplicit=*/true) || + hasAttr(VD, /*IgnoreImplicit=*/true)) + Target = CFT_Device; + S.CurCUDATargetCtx = {Target, K, VD}; + } +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { - // Code that lives outside a function is run on the host. + // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) - return CFT_Host; + return CurCUDATargetCtx.Target; if (D->hasAttr()) return CFT_InvalidTarget; -- cgit v1.1