diff options
Diffstat (limited to 'clang/lib/Sema/SemaDeclAttr.cpp')
-rw-r--r-- | clang/lib/Sema/SemaDeclAttr.cpp | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f8748..9475b8a 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5676,6 +5676,114 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr); } +static std::pair<Expr *, int> +makeClusterDimsArgExpr(Sema &S, Expr *E, const CUDAClusterDimsAttr &AL, + const unsigned Idx) { + if (!E || S.DiagnoseUnexpandedParameterPack(E)) + return {}; + + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (E->isInstantiationDependent()) + return {E, 1}; + + std::optional<llvm::APSInt> I = E->getIntegerConstantExpr(S.Context); + if (!I) { + S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type) + << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange(); + return {}; + } + // Make sure we can fit it in 4 bits. + if (!I->isIntN(4)) { + S.Diag(E->getExprLoc(), diag::err_ice_too_large) + << toString(*I, 10, false) << 4 << /*Unsigned=*/1; + return {}; + } + if (*I < 0) { + S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative) + << &AL << Idx << E->getSourceRange(); + } + + return {ConstantExpr::Create(S.getASTContext(), E, APValue(*I)), + I->getZExtValue()}; +} + +CUDAClusterDimsAttr *Sema::createClusterDimsAttr(const AttributeCommonInfo &CI, + Expr *X, Expr *Y, Expr *Z) { + CUDAClusterDimsAttr TmpAttr(Context, CI, X, Y, Z); + + auto [NewX, ValX] = makeClusterDimsArgExpr(*this, X, TmpAttr, /*Idx=*/0); + auto [NewY, ValY] = makeClusterDimsArgExpr(*this, Y, TmpAttr, /*Idx=*/1); + auto [NewZ, ValZ] = makeClusterDimsArgExpr(*this, Z, TmpAttr, /*Idx=*/2); + + if (!NewX || (Y && !NewY) || (Z && !NewZ)) + return nullptr; + + int FlatDim = ValX * ValY * ValZ; + const llvm::Triple TT = + (!Context.getLangOpts().CUDAIsDevice && Context.getAuxTargetInfo()) + ? Context.getAuxTargetInfo()->getTriple() + : Context.getTargetInfo().getTriple(); + int MaxDim = 1; + if (TT.isNVPTX()) + MaxDim = 8; + else if (TT.isAMDGPU()) + MaxDim = 16; + else + return nullptr; + + // A maximum of 8 thread blocks in a cluster is supported as a portable + // cluster size in CUDA. The number is 16 for AMDGPU. + if (FlatDim > MaxDim) { + Diag(CI.getLoc(), diag::err_cluster_dims_too_large) << MaxDim << FlatDim; + return nullptr; + } + + return CUDAClusterDimsAttr::Create(Context, NewX, NewY, NewZ, CI); +} + +void Sema::addClusterDimsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *X, + Expr *Y, Expr *Z) { + if (auto *Attr = createClusterDimsAttr(CI, X, Y, Z)) + D->addAttr(Attr); +} + +void Sema::addNoClusterAttr(Decl *D, const AttributeCommonInfo &CI) { + D->addAttr(CUDANoClusterAttr::Create(Context, CI)); +} + +static void handleClusterDimsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const TargetInfo &TTI = S.Context.getTargetInfo(); + OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU); + if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) || + (TTI.getTriple().isAMDGPU() && + !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) { + S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL; + return; + } + + if (!AL.checkAtLeastNumArgs(S, /*Num=*/1) || + !AL.checkAtMostNumArgs(S, /*Num=*/3)) + return; + + S.addClusterDimsAttr(D, AL, AL.getArgAsExpr(0), + AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr, + AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr); +} + +static void handleNoClusterAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + const TargetInfo &TTI = S.Context.getTargetInfo(); + OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU); + if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) || + (TTI.getTriple().isAMDGPU() && + !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) { + S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL; + return; + } + + S.addNoClusterAttr(D, AL); +} + static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { @@ -7141,6 +7249,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_CUDALaunchBounds: handleLaunchBoundsAttr(S, D, AL); break; + case ParsedAttr::AT_CUDAClusterDims: + handleClusterDimsAttr(S, D, AL); + break; + case ParsedAttr::AT_CUDANoCluster: + handleNoClusterAttr(S, D, AL); + break; case ParsedAttr::AT_Restrict: handleRestrictAttr(S, D, AL); break; |