aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Sema/ScopeInfo.h3
-rw-r--r--clang/include/clang/Sema/SemaCUDA.h4
-rw-r--r--clang/lib/Sema/SemaCUDA.cpp47
-rw-r--r--clang/lib/Sema/SemaExpr.cpp24
-rw-r--r--clang/lib/Sema/SemaOverload.cpp2
-rw-r--r--clang/test/CodeGenCUDA/lambda-constexpr-capture.cu135
6 files changed, 212 insertions, 3 deletions
diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h
index 958d650..6bf9ae8 100644
--- a/clang/include/clang/Sema/ScopeInfo.h
+++ b/clang/include/clang/Sema/ScopeInfo.h
@@ -949,6 +949,9 @@ public:
SourceLocation PotentialThisCaptureLocation;
+ /// Variables that are potentially ODR-used in CUDA/HIP.
+ llvm::SmallPtrSet<VarDecl *, 4> CUDAPotentialODRUsedVars;
+
LambdaScopeInfo(DiagnosticsEngine &Diag)
: CapturingScopeInfo(Diag, ImpCap_None) {
Kind = SK_Lambda;
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
index 71f05e8..dbc14328 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -274,6 +274,10 @@ public:
/// parameters specified via <<<>>>.
std::string getConfigureFuncName() const;
+ /// Record variables that are potentially ODR-used in CUDA/HIP.
+ void recordPotentialODRUsedVariable(MultiExprArg Args,
+ OverloadCandidateSet &CandidateSet);
+
private:
unsigned ForceHostDeviceDepth = 0;
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 0e5fc5e..0a8c24f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -18,6 +18,7 @@
#include "clang/Basic/TargetInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
+#include "clang/Sema/Overload.h"
#include "clang/Sema/ScopeInfo.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Template.h"
@@ -1100,3 +1101,49 @@ std::string SemaCUDA::getConfigureFuncName() const {
// Legacy CUDA kernel configuration call
return "cudaConfigureCall";
}
+
+// Record any local constexpr variables that are passed one way on the host
+// and another on the device.
+void SemaCUDA::recordPotentialODRUsedVariable(
+ MultiExprArg Arguments, OverloadCandidateSet &Candidates) {
+ sema::LambdaScopeInfo *LambdaInfo = SemaRef.getCurLambda();
+ if (!LambdaInfo)
+ return;
+
+ for (unsigned I = 0; I < Arguments.size(); ++I) {
+ auto *DeclRef = dyn_cast<DeclRefExpr>(Arguments[I]);
+ if (!DeclRef)
+ continue;
+ auto *Variable = dyn_cast<VarDecl>(DeclRef->getDecl());
+ if (!Variable || !Variable->isLocalVarDecl() || !Variable->isConstexpr())
+ continue;
+
+ bool HostByValue = false, HostByRef = false;
+ bool DeviceByValue = false, DeviceByRef = false;
+
+ for (OverloadCandidate &Candidate : Candidates) {
+ FunctionDecl *Callee = Candidate.Function;
+ if (!Callee || I >= Callee->getNumParams())
+ continue;
+
+ CUDAFunctionTarget Target = IdentifyTarget(Callee);
+ if (Target == CUDAFunctionTarget::InvalidTarget ||
+ Target == CUDAFunctionTarget::Global)
+ continue;
+
+ bool CoversHost = (Target == CUDAFunctionTarget::Host ||
+ Target == CUDAFunctionTarget::HostDevice);
+ bool CoversDevice = (Target == CUDAFunctionTarget::Device ||
+ Target == CUDAFunctionTarget::HostDevice);
+
+ bool IsRef = Callee->getParamDecl(I)->getType()->isReferenceType();
+ HostByValue |= CoversHost && !IsRef;
+ HostByRef |= CoversHost && IsRef;
+ DeviceByValue |= CoversDevice && !IsRef;
+ DeviceByRef |= CoversDevice && IsRef;
+ }
+
+ if ((HostByValue && DeviceByRef) || (HostByRef && DeviceByValue))
+ LambdaInfo->CUDAPotentialODRUsedVars.insert(Variable);
+ }
+}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 2e6ce17..4186999 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19517,11 +19517,29 @@ static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E,
return false;
};
+ // Check whether this expression may be odr-used in CUDA/HIP.
+ auto MaybeCUDAODRUsed = [&]() -> bool {
+ if (!S.LangOpts.CUDA)
+ return false;
+ LambdaScopeInfo *LSI = S.getCurLambda();
+ if (!LSI)
+ return false;
+ auto *DRE = dyn_cast<DeclRefExpr>(E);
+ if (!DRE)
+ return false;
+ auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
+ if (!VD)
+ return false;
+ return LSI->CUDAPotentialODRUsedVars.count(VD);
+ };
+
// Mark that this expression does not constitute an odr-use.
auto MarkNotOdrUsed = [&] {
- S.MaybeODRUseExprs.remove(E);
- if (LambdaScopeInfo *LSI = S.getCurLambda())
- LSI->markVariableExprAsNonODRUsed(E);
+ if (!MaybeCUDAODRUsed()) {
+ S.MaybeODRUseExprs.remove(E);
+ if (LambdaScopeInfo *LSI = S.getCurLambda())
+ LSI->markVariableExprAsNonODRUsed(E);
+ }
};
// C++2a [basic.def.odr]p2:
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5b224b6..042de8d 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -14706,6 +14706,8 @@ ExprResult Sema::BuildOverloadedCallExpr(Scope *S, Expr *Fn,
// the UnresolvedLookupExpr was type-dependent.
if (OverloadResult == OR_Success) {
const FunctionDecl *FDecl = Best->Function;
+ if (LangOpts.CUDA)
+ CUDA().recordPotentialODRUsedVariable(Args, CandidateSet);
if (FDecl && FDecl->isTemplateInstantiation() &&
FDecl->getReturnType()->isUndeducedType()) {
diff --git a/clang/test/CodeGenCUDA/lambda-constexpr-capture.cu b/clang/test/CodeGenCUDA/lambda-constexpr-capture.cu
new file mode 100644
index 0000000..1a1db63
--- /dev/null
+++ b/clang/test/CodeGenCUDA/lambda-constexpr-capture.cu
@@ -0,0 +1,135 @@
+// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple x86_64-linux-gnu \
+// RUN: | FileCheck -check-prefixes=CHECK,HOST %s
+// RUN: %clang_cc1 -emit-llvm -x hip %s -o - -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: | FileCheck -check-prefixes=CHECK,DEV %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: %class.anon = type { ptr, float, ptr, ptr }
+// CHECK: %class.anon.0 = type { ptr, float, ptr, ptr }
+// CHECK: %class.anon.1 = type { ptr, ptr, ptr }
+// CHECK: %class.anon.2 = type { ptr, float, ptr, ptr }
+
+// HOST: call void @_ZN8DevByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon)
+// DEV: define amdgpu_kernel void @_ZN8DevByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon)
+
+// Only the device function passes arugments by value.
+namespace DevByVal {
+__device__ float fun(float x, float y) {
+ return x;
+}
+
+float fun(const float &x, const float &y) {
+ return x;
+}
+
+template<typename F>
+void __global__ kernel(F f)
+{
+ f(1);
+}
+
+void test(float const * fl, float const * A, float * Vf)
+{
+ float constexpr small(1.0e-25);
+
+ auto lambda = [=] __device__ __host__ (unsigned int n) {
+ float const value = fun(small, fl[0]);
+ Vf[0] = value * A[0];
+ };
+ kernel<<<1, 1>>>(lambda);
+}
+}
+
+// HOST: call void @_ZN9HostByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.0)
+// DEV: define amdgpu_kernel void @_ZN9HostByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.0)
+
+// Only the host function passes arugments by value.
+namespace HostByVal {
+float fun(float x, float y) {
+ return x;
+}
+
+__device__ float fun(const float &x, const float &y) {
+ return x;
+}
+
+template<typename F>
+void __global__ kernel(F f)
+{
+ f(1);
+}
+
+void test(float const * fl, float const * A, float * Vf)
+{
+ float constexpr small(1.0e-25);
+
+ auto lambda = [=] __device__ __host__ (unsigned int n) {
+ float const value = fun(small, fl[0]);
+ Vf[0] = value * A[0];
+ };
+ kernel<<<1, 1>>>(lambda);
+}
+}
+
+// HOST: call void @_ZN9BothByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.1)
+// DEV: define amdgpu_kernel void @_ZN9BothByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.1)
+
+// Both the host and device functions pass arugments by value.
+namespace BothByVal {
+float fun(float x, float y) {
+ return x;
+}
+
+__device__ float fun(float x, float y) {
+ return x;
+}
+
+template<typename F>
+void __global__ kernel(F f)
+{
+ f(1);
+}
+
+void test(float const * fl, float const * A, float * Vf)
+{
+ float constexpr small(1.0e-25);
+
+ auto lambda = [=] __device__ __host__ (unsigned int n) {
+ float const value = fun(small, fl[0]);
+ Vf[0] = value * A[0];
+ };
+ kernel<<<1, 1>>>(lambda);
+}
+}
+
+// HOST: call void @_ZN12NeitherByVal21__device_stub__kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr noundef byval(%class.anon.2)
+// DEV: define amdgpu_kernel void @_ZN12NeitherByVal6kernelIZNS_4testEPKfS2_PfEUljE_EEvT_(ptr addrspace(4) noundef byref(%class.anon.2)
+
+// Neither the host nor device function passes arugments by value.
+namespace NeitherByVal {
+float fun(const float& x, const float& y) {
+ return x;
+}
+
+__device__ float fun(const float& x, const float& y) {
+ return x;
+}
+
+template<typename F>
+void __global__ kernel(F f)
+{
+ f(1);
+}
+
+void test(float const * fl, float const * A, float * Vf)
+{
+ float constexpr small(1.0e-25);
+
+ auto lambda = [=] __device__ __host__ (unsigned int n) {
+ float const value = fun(small, fl[0]);
+ Vf[0] = value * A[0];
+ };
+ kernel<<<1, 1>>>(lambda);
+}
+}