diff options
author | Valentin Clement (バレンタイン クレメン) <clementval@gmail.com> | 2025-04-17 07:25:48 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-17 07:25:48 -0700 |
commit | 9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51 (patch) | |
tree | 936b2d92396fbfb6b5780a71786a89d32b4459ba | |
parent | 728f6de4177a7e4d8030cb37ace525e2af97d247 (diff) | |
download | llvm-9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51.zip llvm-9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51.tar.gz llvm-9ee4fdf4990f7dbdc0cfc30a8f733f676dbdcb51.tar.bz2 |
[flang][cuda] Introduce stream cast op (#136050)
Cast a stream object reference as a GPU async token. This is useful to
be able to connect the stream representation of CUDA Fortran and the
async mechanism of the GPU dialect.
This op will later become a no op.
-rw-r--r-- | flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td | 22 | ||||
-rw-r--r-- | flang/include/flang/Optimizer/Support/InitFIR.h | 2 | ||||
-rw-r--r-- | flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 11 | ||||
-rw-r--r-- | flang/test/Fir/CUDA/cuda-stream.mlir | 21 | ||||
-rw-r--r-- | flang/tools/fir-opt/fir-opt.cpp | 2 |
5 files changed, 55 insertions, 3 deletions
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index feef548..f55f3e8 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -18,6 +18,7 @@ include "flang/Optimizer/Dialect/CUF/CUFDialect.td" include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRTypes.td" include "flang/Optimizer/Dialect/FIRAttr.td" +include "mlir/Dialect/GPU/IR/GPUBase.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/IR/BuiltinAttributes.td" @@ -370,4 +371,25 @@ def cuf_SharedMemoryOp CArg<"llvm::ArrayRef<mlir::NamedAttribute>", "{}">:$attributes)>]; } +def cuf_StreamCastOp : cuf_Op<"stream_cast", [NoMemoryEffect]> { + let summary = "Adapt a stream value to a GPU async token"; + + let description = [{ + Cast a stream object reference as a GPU async token. This is useful to be + able to connect the stream representation of CUDA Fortran and the async + mechanism of the GPU dialect. + Later in the lowering this will become a no op. + }]; + + let arguments = (ins fir_ReferenceType:$stream); + + let results = (outs GPU_AsyncToken:$token); + + let assemblyFormat = [{ + $stream attr-dict `:` type($stream) + }]; + + let hasVerifier = 1; +} + #endif // FORTRAN_DIALECT_CUF_CUF_OPS diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index 4c57e01..1868fbb 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -40,7 +40,7 @@ namespace fir::support { mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \ mlir::vector::VectorDialect, mlir::math::MathDialect, \ mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect, \ - mlir::NVVM::NVVMDialect + mlir::NVVM::NVVMDialect, mlir::gpu::GPUDialect #define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 957e4c0..ce197d4 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -319,6 +319,17 @@ void cuf::SharedMemoryOp::build( result.addAttributes(attributes); } +//===----------------------------------------------------------------------===// +// StreamCastOp +//===----------------------------------------------------------------------===// + +llvm::LogicalResult cuf::StreamCastOp::verify() { + auto refTy = mlir::dyn_cast<fir::ReferenceType>(getStream().getType()); + if (!refTy.getEleTy().isInteger(64)) + return emitOpError("stream is expected to be a i64 reference"); + return mlir::success(); +} + // Tablegen operators #define GET_OP_CLASSES diff --git a/flang/test/Fir/CUDA/cuda-stream.mlir b/flang/test/Fir/CUDA/cuda-stream.mlir new file mode 100644 index 0000000..50f2304 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-stream.mlir @@ -0,0 +1,21 @@ +// RUN: fir-opt --split-input-file %s | FileCheck %s + +module attributes {gpu.container_module} { + gpu.module @cuda_device_mod { + gpu.func @_QMmod1Psub1() kernel { + gpu.return + } + } + func.func @_QMmod1Phost_sub() { + %0 = fir.alloca i64 + %1 = arith.constant 1 : index + %asyncTok = cuf.stream_cast %0 : !fir.ref<i64> + gpu.launch_func [%asyncTok] @cuda_device_mod::@_QMmod1Psub1 blocks in (%1, %1, %1) threads in (%1, %1, %1) args() {cuf.proc_attr = #cuf.cuda_proc<grid_global>} + return + } +} + +// CHECK-LABEL: func.func @_QMmod1Phost_sub() +// CHECK: %[[STREAM:.*]] = fir.alloca i64 +// CHECK: %[[TOKEN:.*]] = cuf.stream_cast %[[STREAM]] : <i64> +// CHECK: gpu.launch_func [%[[TOKEN]]] @cuda_device_mod::@_QMmod1Psub1 diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index ef510ff7..d66fc3f 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -44,8 +44,6 @@ int main(int argc, char **argv) { #endif DialectRegistry registry; fir::support::registerDialects(registry); - registry.insert<mlir::gpu::GPUDialect>(); - registry.insert<mlir::NVVM::NVVMDialect>(); fir::support::addFIRExtensions(registry); return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n", registry)); |