aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Sema/SemaDeclAttr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Sema/SemaDeclAttr.cpp')
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp114
1 files changed, 114 insertions, 0 deletions
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index e6f8748..9475b8a 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5676,6 +5676,114 @@ static void handleLaunchBoundsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
}
+static std::pair<Expr *, int>
+makeClusterDimsArgExpr(Sema &S, Expr *E, const CUDAClusterDimsAttr &AL,
+ const unsigned Idx) {
+ if (!E || S.DiagnoseUnexpandedParameterPack(E))
+ return {};
+
+ // Accept template arguments for now as they depend on something else.
+ // We'll get to check them when they eventually get instantiated.
+ if (E->isInstantiationDependent())
+ return {E, 1};
+
+ std::optional<llvm::APSInt> I = E->getIntegerConstantExpr(S.Context);
+ if (!I) {
+ S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
+ << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
+ return {};
+ }
+ // Make sure we can fit it in 4 bits.
+ if (!I->isIntN(4)) {
+ S.Diag(E->getExprLoc(), diag::err_ice_too_large)
+ << toString(*I, 10, false) << 4 << /*Unsigned=*/1;
+ return {};
+ }
+ if (*I < 0) {
+ S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
+ << &AL << Idx << E->getSourceRange();
+ }
+
+ return {ConstantExpr::Create(S.getASTContext(), E, APValue(*I)),
+ I->getZExtValue()};
+}
+
+CUDAClusterDimsAttr *Sema::createClusterDimsAttr(const AttributeCommonInfo &CI,
+ Expr *X, Expr *Y, Expr *Z) {
+ CUDAClusterDimsAttr TmpAttr(Context, CI, X, Y, Z);
+
+ auto [NewX, ValX] = makeClusterDimsArgExpr(*this, X, TmpAttr, /*Idx=*/0);
+ auto [NewY, ValY] = makeClusterDimsArgExpr(*this, Y, TmpAttr, /*Idx=*/1);
+ auto [NewZ, ValZ] = makeClusterDimsArgExpr(*this, Z, TmpAttr, /*Idx=*/2);
+
+ if (!NewX || (Y && !NewY) || (Z && !NewZ))
+ return nullptr;
+
+ int FlatDim = ValX * ValY * ValZ;
+ const llvm::Triple TT =
+ (!Context.getLangOpts().CUDAIsDevice && Context.getAuxTargetInfo())
+ ? Context.getAuxTargetInfo()->getTriple()
+ : Context.getTargetInfo().getTriple();
+ int MaxDim = 1;
+ if (TT.isNVPTX())
+ MaxDim = 8;
+ else if (TT.isAMDGPU())
+ MaxDim = 16;
+ else
+ return nullptr;
+
+ // A maximum of 8 thread blocks in a cluster is supported as a portable
+ // cluster size in CUDA. The number is 16 for AMDGPU.
+ if (FlatDim > MaxDim) {
+ Diag(CI.getLoc(), diag::err_cluster_dims_too_large) << MaxDim << FlatDim;
+ return nullptr;
+ }
+
+ return CUDAClusterDimsAttr::Create(Context, NewX, NewY, NewZ, CI);
+}
+
+void Sema::addClusterDimsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *X,
+ Expr *Y, Expr *Z) {
+ if (auto *Attr = createClusterDimsAttr(CI, X, Y, Z))
+ D->addAttr(Attr);
+}
+
+void Sema::addNoClusterAttr(Decl *D, const AttributeCommonInfo &CI) {
+ D->addAttr(CUDANoClusterAttr::Create(Context, CI));
+}
+
+static void handleClusterDimsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ const TargetInfo &TTI = S.Context.getTargetInfo();
+ OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU);
+ if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) ||
+ (TTI.getTriple().isAMDGPU() &&
+ !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) {
+ S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL;
+ return;
+ }
+
+ if (!AL.checkAtLeastNumArgs(S, /*Num=*/1) ||
+ !AL.checkAtMostNumArgs(S, /*Num=*/3))
+ return;
+
+ S.addClusterDimsAttr(D, AL, AL.getArgAsExpr(0),
+ AL.getNumArgs() > 1 ? AL.getArgAsExpr(1) : nullptr,
+ AL.getNumArgs() > 2 ? AL.getArgAsExpr(2) : nullptr);
+}
+
+static void handleNoClusterAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ const TargetInfo &TTI = S.Context.getTargetInfo();
+ OffloadArch Arch = StringToOffloadArch(TTI.getTargetOpts().CPU);
+ if ((TTI.getTriple().isNVPTX() && Arch < clang::OffloadArch::SM_90) ||
+ (TTI.getTriple().isAMDGPU() &&
+ !TTI.hasFeatureEnabled(TTI.getTargetOpts().FeatureMap, "clusters"))) {
+ S.Diag(AL.getLoc(), diag::err_cluster_attr_not_supported) << AL;
+ return;
+ }
+
+ S.addNoClusterAttr(D, AL);
+}
+
static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
const ParsedAttr &AL) {
if (!AL.isArgIdent(0)) {
@@ -7141,6 +7249,12 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDALaunchBounds:
handleLaunchBoundsAttr(S, D, AL);
break;
+ case ParsedAttr::AT_CUDAClusterDims:
+ handleClusterDimsAttr(S, D, AL);
+ break;
+ case ParsedAttr::AT_CUDANoCluster:
+ handleNoClusterAttr(S, D, AL);
+ break;
case ParsedAttr::AT_Restrict:
handleRestrictAttr(S, D, AL);
break;