aboutsummaryrefslogtreecommitdiff
path: root/mlir
diff options
context:
space:
mode:
authorGuray Ozen <guray.ozen@gmail.com>2023-12-06 10:41:57 +0100
committerGitHub <noreply@github.com>2023-12-06 10:41:57 +0100
commit641e05decc64c541aa5fd5ce253020178ffe094a (patch)
tree899ba473fefd68b56afff3beae00df9074203ba2 /mlir
parentea4ce16da2c7105c72ec8a3f265d5949febe6718 (diff)
downloadllvm-641e05decc64c541aa5fd5ce253020178ffe094a.zip
llvm-641e05decc64c541aa5fd5ce253020178ffe094a.tar.gz
llvm-641e05decc64c541aa5fd5ce253020178ffe094a.tar.bz2
[mlir][gpu] Support dynamic_shared_memory Op with vector dialect (#74475)
`gpu.dynamic_shared_memory` currently does not get lowered when it is used with vector dialect. The reason is that vector-to-llvm conversion is not included in gpu-to-nvvm. This PR includes that and adds a test.
Diffstat (limited to 'mlir')
-rw-r--r--mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp2
-rw-r--r--mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir20
2 files changed, 22 insertions, 0 deletions
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 4855fd1..0e978ca 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -21,6 +21,7 @@
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -282,6 +283,7 @@ struct LowerGpuOpsToNVVMOpsPass
populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
populateGpuToNVVMConversionPatterns(converter, llvmPatterns);
populateGpuWMMAToNVVMConversionPatterns(converter, llvmPatterns);
+ populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
if (this->hasRedux)
populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
LLVMConversionTarget target(getContext());
diff --git a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
index f8d82f53..771f318 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir
@@ -50,6 +50,26 @@ gpu.module @kernel {
// -----
gpu.module @kernel {
+ gpu.func @dynamic_shmem_with_vector(%arg1: memref<1xf32>) {
+ %0 = arith.constant 0 : index
+ %1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>
+ %2 = memref.view %1[%0][] : memref<?xi8, #gpu.address_space<workgroup>> to memref<1xf32, #gpu.address_space<workgroup>>
+ %3 = vector.load %2[%0] : memref<1xf32, #gpu.address_space<workgroup>>, vector<1xf32>
+ vector.store %3, %arg1[%0] : memref<1xf32>, vector<1xf32>
+ gpu.return
+ }
+}
+
+// ROCDL: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32} : !llvm.array<0 x i8>
+// NVVM: llvm.mlir.global internal @__dynamic_shmem__0() {addr_space = 3 : i32, alignment = 16 : i64} : !llvm.array<0 x i8>
+// CHECK-LABEL: llvm.func @dynamic_shmem_with_vector
+// CHECK: llvm.mlir.addressof @__dynamic_shmem__0 : !llvm.ptr<3>
+// CHECK: llvm.load %{{.*}} {alignment = 4 : i64} : !llvm.ptr<3> -> vector<1xf32>
+// CHECK: llvm.store
+
+// -----
+
+gpu.module @kernel {
gpu.func @dynamic_shmem(%arg0: f32) {
%0 = arith.constant 0 : index
%1 = gpu.dynamic_shared_memory : memref<?xi8, #gpu.address_space<workgroup>>