diff options
author | Valentin Clement (バレンタイン クレメン) <clementval@gmail.com> | 2025-04-02 14:26:09 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-02 14:26:09 -0700 |
commit | db21ae7803333032e466ead0c2a29c6760739936 (patch) | |
tree | 4f08bd7717f2525f8c0ff0913b89dfb7429e06d0 | |
parent | 066787b9bdc4ec5ae7e365b651f37840fd5bb2b5 (diff) | |
download | llvm-db21ae7803333032e466ead0c2a29c6760739936.zip llvm-db21ae7803333032e466ead0c2a29c6760739936.tar.gz llvm-db21ae7803333032e466ead0c2a29c6760739936.tar.bz2 |
[flang][cuda] Support any_sync and ballot_sync (#134135)
-rw-r--r-- | flang/include/flang/Optimizer/Builder/IntrinsicCall.h | 2 | ||||
-rw-r--r-- | flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 39 | ||||
-rw-r--r-- | flang/module/cudadevice.f90 | 14 | ||||
-rw-r--r-- | flang/test/Lower/CUDA/cuda-device-proc.cuf | 6 |
4 files changed, 53 insertions, 8 deletions
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index a31bbd0..4cbef14 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -442,6 +442,8 @@ struct IntrinsicLibrary { fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef<mlir::Value>); /// Implement all conversion functions like DBLE, the first argument is /// the value to convert. There may be an additional KIND arguments that diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 9029ea6..8aed288 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -273,6 +273,10 @@ static constexpr IntrinsicHandler handlers[]{ &I::genAny, {{{"mask", asAddr}, {"dim", asValue}}}, /*isElemental=*/false}, + {"any_sync", + &I::genVoteAnySync, + {{{"mask", asValue}, {"pred", asValue}}}, + /*isElemental=*/false}, {"asind", &I::genAsind}, {"associated", &I::genAssociated, @@ -335,6 +339,10 @@ static constexpr IntrinsicHandler handlers[]{ {"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, {"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"ballot_sync", + &I::genVoteBallotSync, + {{{"mask", asValue}, {"pred", asValue}}}, + /*isElemental=*/false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -6499,12 +6507,9 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType, return value; } -// ALL_SYNC -mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType, - llvm::ArrayRef<mlir::Value> args) { - assert(args.size() == 2); - - llvm::StringRef funcName = "llvm.nvvm.vote.all.sync"; +static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc, + llvm::StringRef funcName, + llvm::ArrayRef<mlir::Value> args) { mlir::MLIRContext *context = builder.getContext(); mlir::Type i32Ty = builder.getI32Type(); mlir::FunctionType ftype = @@ -6514,6 +6519,28 @@ mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType, return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0); } +// ALL_SYNC +mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 2); + return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync", args); +} + +// ANY_SYNC +mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 2); + return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync", args); +} + +// BALLOT_SYNC +mlir::Value +IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 2); + return genVoteSync(builder, loc, "llvm.nvvm.vote.ballot.sync", args); +} + // MATCH_ANY_SYNC mlir::Value IntrinsicLibrary::genMatchAnySync(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 6b8aa4d..591e25e 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1022,6 +1022,20 @@ implicit none end function end interface + interface any_sync + attributes(device) integer function any_sync(mask, pred) + !dir$ ignore_tkr(d) mask, (td) pred + integer, value :: mask, pred + end function + end interface + + interface ballot_sync + attributes(device) integer function ballot_sync(mask, pred) + !dir$ ignore_tkr(d) mask, (td) pred + integer, value :: mask, pred + end function + end interface + ! LDCG interface __ldcg attributes(device) pure integer(4) function __ldcg_i4(x) bind(c) diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 9758107..6a7fee7 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -299,12 +299,14 @@ end attributes(device) subroutine testVote() integer :: a, ipred, mask, v32 a = all_sync(mask, v32) - + a = any_sync(mask, v32) + a = ballot_sync(mask, v32) end subroutine ! CHECK-LABEL: func.func @_QPtestvote() ! CHECK: fir.call @llvm.nvvm.vote.all.sync - +! CHECK: fir.call @llvm.nvvm.vote.any.sync +! CHECK: fir.call @llvm.nvvm.vote.ballot.sync ! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) ! CHECK-DAG: func.func private @__ldcg_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>) |