diff options
author | Guray Ozen <guray.ozen@gmail.com> | 2023-12-05 19:56:43 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-05 19:56:43 +0100 |
commit | 391a7577e703516dbefd41b3da8f3bbd751c6978 (patch) | |
tree | 69a1710ae041cce270c7ca245f81f1e602998c10 | |
parent | 9c2e5449f4c7b8dea918cf9640a4cc26c6972866 (diff) | |
download | llvm-391a7577e703516dbefd41b3da8f3bbd751c6978.zip llvm-391a7577e703516dbefd41b3da8f3bbd751c6978.tar.gz llvm-391a7577e703516dbefd41b3da8f3bbd751c6978.tar.bz2 |
[mlir][gpu] Add lowering dynamic_shared_memory op for rocdl (#74473)
This PR adds lowering of `gpu.dynamic_shared_memory` to rocdl target.
-rw-r--r-- | mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 2 | ||||
-rw-r--r-- | mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir | 17 |
2 files changed, 19 insertions, 0 deletions
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index d9f94e3..599bb13 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -363,6 +363,8 @@ void mlir::populateGpuToROCDLConversionPatterns( // Use address space = 4 to match the OpenCL definition of printf() patterns.add<GPUPrintfOpToLLVMCallLowering>(converter, /*addressSpace=*/4); } + // TODO: Add alignment for workgroup memory + patterns.add<GPUDynamicSharedMemoryOpLowering>(converter); patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL>(converter); diff --git a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir index 14f5302..f8d82f53 100644 --- a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir +++ b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir @@ -46,3 +46,20 @@ gpu.module @kernel { // CHECK: [[value:%.+]] = llvm.load // CHECK-SAME: : !llvm.ptr<1> -> f32 // CHECK: llvm.return [[value]] + +// ----- + +gpu.module @kernel { + gpu.func @dynamic_shmem(%arg0: f32) { + %0 = arith.constant 0 : index + %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>> + %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<4xf32, #gpu.address_space<workgroup>> + memref.store %arg0, %2[%0] : memref<4xf32, #gpu.address_space<workgroup>> + gpu.return + } +} + +// CHECK-LABEL: llvm.func @dynamic_shmem +// CHECK: llvm.store +// CHECK-SAME: : f32, !llvm.ptr<3> + |