aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Clement (バレンタイン クレメン) <clementval@gmail.com>2025-04-02 14:26:09 -0700
committerGitHub <noreply@github.com>2025-04-02 14:26:09 -0700
commitdb21ae7803333032e466ead0c2a29c6760739936 (patch)
tree4f08bd7717f2525f8c0ff0913b89dfb7429e06d0
parent066787b9bdc4ec5ae7e365b651f37840fd5bb2b5 (diff)
downloadllvm-db21ae7803333032e466ead0c2a29c6760739936.zip
llvm-db21ae7803333032e466ead0c2a29c6760739936.tar.gz
llvm-db21ae7803333032e466ead0c2a29c6760739936.tar.bz2
[flang][cuda] Support any_sync and ballot_sync (#134135)
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h2
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp39
-rw-r--r--flang/module/cudadevice.f9014
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf6
4 files changed, 53 insertions, 8 deletions
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index a31bbd0..4cbef14 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -442,6 +442,8 @@ struct IntrinsicLibrary {
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
/// Implement all conversion functions like DBLE, the first argument is
/// the value to convert. There may be an additional KIND arguments that
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 9029ea6..8aed288 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -273,6 +273,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genAny,
{{{"mask", asAddr}, {"dim", asValue}}},
/*isElemental=*/false},
+ {"any_sync",
+ &I::genVoteAnySync,
+ {{{"mask", asValue}, {"pred", asValue}}},
+ /*isElemental=*/false},
{"asind", &I::genAsind},
{"associated",
&I::genAssociated,
@@ -335,6 +339,10 @@ static constexpr IntrinsicHandler handlers[]{
{"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"ballot_sync",
+ &I::genVoteBallotSync,
+ {{{"mask", asValue}, {"pred", asValue}}},
+ /*isElemental=*/false},
{"bessel_jn",
&I::genBesselJn,
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -6499,12 +6507,9 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType,
return value;
}
-// ALL_SYNC
-mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
- assert(args.size() == 2);
-
- llvm::StringRef funcName = "llvm.nvvm.vote.all.sync";
+static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc,
+ llvm::StringRef funcName,
+ llvm::ArrayRef<mlir::Value> args) {
mlir::MLIRContext *context = builder.getContext();
mlir::Type i32Ty = builder.getI32Type();
mlir::FunctionType ftype =
@@ -6514,6 +6519,28 @@ mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
}
+// ALL_SYNC
+mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync", args);
+}
+
+// ANY_SYNC
+mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync", args);
+}
+
+// BALLOT_SYNC
+mlir::Value
+IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ return genVoteSync(builder, loc, "llvm.nvvm.vote.ballot.sync", args);
+}
+
// MATCH_ANY_SYNC
mlir::Value
IntrinsicLibrary::genMatchAnySync(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 6b8aa4d..591e25e 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1022,6 +1022,20 @@ implicit none
end function
end interface
+ interface any_sync
+ attributes(device) integer function any_sync(mask, pred)
+ !dir$ ignore_tkr(d) mask, (td) pred
+ integer, value :: mask, pred
+ end function
+ end interface
+
+ interface ballot_sync
+ attributes(device) integer function ballot_sync(mask, pred)
+ !dir$ ignore_tkr(d) mask, (td) pred
+ integer, value :: mask, pred
+ end function
+ end interface
+
! LDCG
interface __ldcg
attributes(device) pure integer(4) function __ldcg_i4(x) bind(c)
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 9758107..6a7fee7 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -299,12 +299,14 @@ end
attributes(device) subroutine testVote()
integer :: a, ipred, mask, v32
a = all_sync(mask, v32)
-
+ a = any_sync(mask, v32)
+ a = ballot_sync(mask, v32)
end subroutine
! CHECK-LABEL: func.func @_QPtestvote()
! CHECK: fir.call @llvm.nvvm.vote.all.sync
-
+! CHECK: fir.call @llvm.nvvm.vote.any.sync
+! CHECK: fir.call @llvm.nvvm.vote.ballot.sync
! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>)
! CHECK-DAG: func.func private @__ldcg_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>)