diff options
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/CodeGen/Targets/AMDGPU.cpp | 20 | ||||
| -rw-r--r-- | clang/lib/Headers/__clang_hip_runtime_wrapper.h | 2 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaDeclAttr.cpp | 114 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaTemplateInstantiateDecl.cpp | 22 |
4 files changed, 158 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 0fcbf7e..16d5919 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -402,6 +402,26 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes( F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str()); } + + if (auto *Attr = FD->getAttr<CUDAClusterDimsAttr>()) { + auto GetExprVal = [&](const auto &E) { + return E ? E->EvaluateKnownConstInt(M.getContext()).getExtValue() : 1; + }; + unsigned X = GetExprVal(Attr->getX()); + unsigned Y = GetExprVal(Attr->getY()); + unsigned Z = GetExprVal(Attr->getZ()); + llvm::SmallString<32> AttrVal; + llvm::raw_svector_ostream OS(AttrVal); + OS << X << ',' << Y << ',' << Z; + F->addFnAttr("amdgpu-cluster-dims", AttrVal.str()); + } + + // OpenCL doesn't support cluster feature. + const TargetInfo &TTI = M.getContext().getTargetInfo(); + if ((IsOpenCLKernel && + TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters")) || + FD->hasAttr<CUDANoClusterAttr>()) + F->addFnAttr("amdgpu-cluster-dims", "0,0,0"); } void AMDGPUTargetCodeGenInfo::setTargetAttributes( diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index da1e39a..fb0ece9 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -25,6 +25,8 @@ #define __constant__ __attribute__((constant)) #define __managed__ __attribute__((managed)) +#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) + #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; #endif diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f8748..9475b8a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5676,6 +5676,114 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr); } +static std::pair<Expr *, int> +makeClusterDimsArgExpr(Sema &S, Expr *E, const CUDAClusterDimsAttr &AL, + const unsigned Idx) { + if (!E || S.DiagnoseUnexpandedParameterPack(E)) + return {}; + + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (E->isInstantiationDependent()) + return {E, 1}; + + std::optional<llvm::APSInt> I = E->getIntegerConstantExpr(S.Context); + if (!I) { + S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type) + << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange(); + return {}; + } + // Make sure we can fit it in 4 bits. + if (!I->isIntN(4)) { + S.Diag(E->getExprLoc(), diag::err_ice_too_large) + << toString(*I, 10, false) << 4 << /*Unsigned=*/1; + return {}; + } + if (*I < 0) { + S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative) + << &AL << Idx << E->getSourceRange(); + } + + return {ConstantExpr::Create(S.getASTContext(), E, APValue(*I)), + I->getZExtValue()}; +} + +CUDAClusterDimsAttr *Sema::createClusterDimsAttr(const AttributeCommonInfo &CI, + Expr *X, Expr *Y, Expr *Z) { + CUDAClusterDimsAttr TmpAttr(Context, CI, X, Y, Z); + + auto [NewX, ValX] = makeClusterDimsArgExpr(*this, X, TmpAttr, /*Idx=*/0); + auto [NewY, ValY] = makeClusterDimsArgExpr(*this, Y, TmpAttr, /*Idx=*/1); + auto [NewZ, ValZ] = makeClusterDimsArgExpr(*this, Z, TmpAttr, /*Idx=*/2); + + if (!NewX || (Y && !NewY) || (Z && !NewZ)) + return nullptr; + + int FlatDim = ValX * ValY * ValZ; + const llvm::Triple TT = + (!Context.getLangOpts().CUDAIsDevice && Context.getAuxTargetInfo()) + ? Context.getAuxTargetInfo()->getTriple() + : Context.getTargetInfo().getTriple(); + int MaxDim = 1; + if (TT.isNVPTX()) + MaxDim = 8; + else if (TT.isAMDGPU()) + MaxDim = 16; + else + return nullptr; + + // A maximum of 8 thread blocks in a cluster is supported as a portable + // cluster size in CUDA. The number is 16 for AMDGPU. + if (FlatDim > MaxDim) { + Diag(CI.getLoc(), diag::err_cluster_dims_too_large) << MaxDim << FlatDim; + return nullptr; + } + + return CUDAClusterDimsAttr::Create(Context, NewX, NewY, NewZ, CI); +} + +void Sema::addClusterDimsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *X, + Expr *Y, Expr *Z) { + if (auto *Attr = createClusterDimsAttr(CI, X, Y, Z)) + D->addAttr(Attr); +} + +void Sema::addNoClusterAttr(Decl *D, const AttributeCommonInfo &CI) { + D->addAttr(CUDANoClusterAttr::Create(Context, CI)); +} + +static void handleClusterDimsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const TargetInfo &TTI = S.Context.getTargetInfo(); + OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU); + if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) || + (TTI.getTriple().isAMDGPU() && + !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) { + S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL; + return; + } + + if (!AL.checkAtLeastNumArgs(S, /*Num=*/1) || + !AL.checkAtMostNumArgs(S, /*Num=*/3)) + return; + + S.addClusterDimsAttr(D, AL, AL.getArgAsExpr(0), + AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr, + AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr); +} + +static void handleNoClusterAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const TargetInfo &TTI = S.Context.getTargetInfo(); + OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU); + if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) || + (TTI.getTriple().isAMDGPU() && + !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) { + S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL; + return; + } + + S.addNoClusterAttr(D, AL); +} + static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { @@ -7141,6 +7249,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CUDALaunchBounds: handleLaunchBoundsAttr(S, D, AL); break; + case ParsedAttr::AT_CUDAClusterDims: + handleClusterDimsAttr(S, D, AL); + break; + case ParsedAttr::AT_CUDANoCluster: + handleNoClusterAttr(S, D, AL); + break; case ParsedAttr::AT_Restrict: handleRestrictAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 4ace03d..4863b45 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -707,6 +707,23 @@ static void instantiateDependentAMDGPUMaxNumWorkGroupsAttr( S.AMDGPU().addAMDGPUMaxNumWorkGroupsAttr(New, Attr, XExpr, YExpr, ZExpr); } +static void instantiateDependentCUDAClusterDimsAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const CUDAClusterDimsAttr &Attr, Decl *New) { + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + auto SubstElt = [&S, &TemplateArgs](Expr *E) { + return E ? S.SubstExpr(E, TemplateArgs).get() : nullptr; + }; + + Expr *XExpr = SubstElt(Attr.getX()); + Expr *YExpr = SubstElt(Attr.getY()); + Expr *ZExpr = SubstElt(Attr.getZ()); + + S.addClusterDimsAttr(New, Attr, XExpr, YExpr, ZExpr); +} + // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. @@ -929,6 +946,11 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *this, TemplateArgs, *AMDGPUMaxNumWorkGroups, New); } + if (const auto *CUDAClusterDims = dyn_cast<CUDAClusterDimsAttr>(TmplAttr)) { + instantiateDependentCUDAClusterDimsAttr(*this, TemplateArgs, + *CUDAClusterDims, New); + } + if (const auto *ParamAttr = dyn_cast<HLSLParamModifierAttr>(TmplAttr)) { instantiateDependentHLSLParamModifierAttr(*this, TemplateArgs, ParamAttr, Tmpl, New); |
