[mlir][GPU] add required address space cast when lowering to LLVM

The runtime functions `memset` and `memcpy` are lowered are declared with pointers to the default address space (0) while their ops however are compatible with memrefs taking any address space. Such cases do not cause any issues with MLIRs LLVM Dialect due to `bitcast`s verifier being too lenient at the moment, but actual LLVM IR does not allow casting between address spaces using `bitcast`: https://godbolt.org/z/3a1z97rc9 This patch fixes the issue by inserting an address space cast before the bitcast, to first cast the pointer into the correct address space before doing the bitcast. Differential Revision: https://reviews.llvm.org/D143866
author: Markus Böck <markus.boeck02@gmail.com> 2023-02-12 23:52:16 +0100
committer: Markus Böck <markus.boeck02@gmail.com> 2023-02-13 22:24:20 +0100
commit: 0aaf2e3bc057aa1d784455f8f4da66bc464733d6 (patch)
tree: 98a0278d25f66b224a38359140f1eaa947636c87 /mlir/test/Conversion/GPUCommon
parent: 9d0b596aada6fb2166dd4f6f58e359fbac483154 (diff)
download: llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.zip
llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.tar.gz
llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.tar.bz2
2 files changed, 5 insertions, 2 deletions
diff --git a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
index df10b31..89c0268 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memcpy-to-gpu-runtime-calls.mlir
@@ -7,8 +7,10 @@ module attributes {gpu.container_module} {
     // CHECK: %[[t0:.*]] = llvm.call @mgpuStreamCreate
     %t0 = gpu.wait async
     // CHECK: %[[size_bytes:.*]] = llvm.ptrtoint
+    // CHECK-NOT: llvm.addrspacecast
     // CHECK: %[[src:.*]] = llvm.bitcast
-    // CHECK: %[[dst:.*]] = llvm.bitcast
+    // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
+    // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
     // CHECK: llvm.call @mgpuMemcpy(%[[dst]], %[[src]], %[[size_bytes]], %[[t0]])
     %t1 = gpu.memcpy async [%t0] %dst, %src : memref<7xf32, 1>, memref<7xf32>
     // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
diff --git a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
index ef5b6ef..562c155 100644
--- a/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
+++ b/mlir/test/Conversion/GPUCommon/lower-memset-to-gpu-runtime-calls.mlir
@@ -8,7 +8,8 @@ module attributes {gpu.container_module} {
     %t0 = gpu.wait async
     // CHECK: %[[size_bytes:.*]] = llvm.mlir.constant
     // CHECK: %[[value:.*]] = llvm.bitcast
-    // CHECK: %[[dst:.*]] = llvm.bitcast
+    // CHECK: %[[addr_cast:.*]] = llvm.addrspacecast
+    // CHECK: %[[dst:.*]] = llvm.bitcast %[[addr_cast]]
     // CHECK: llvm.call @mgpuMemset32(%[[dst]], %[[value]], %[[size_bytes]], %[[t0]])
     %t1 = gpu.memset async [%t0] %dst, %value : memref<7xf32, 1>, f32
     // CHECK: llvm.call @mgpuStreamSynchronize(%[[t0]])
author	Markus Böck <markus.boeck02@gmail.com>	2023-02-12 23:52:16 +0100
committer	Markus Böck <markus.boeck02@gmail.com>	2023-02-13 22:24:20 +0100
commit	0aaf2e3bc057aa1d784455f8f4da66bc464733d6 (patch)
tree	98a0278d25f66b224a38359140f1eaa947636c87 /mlir/test/Conversion/GPUCommon
parent	9d0b596aada6fb2166dd4f6f58e359fbac483154 (diff)
download	llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.zip llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.tar.gz llvm-0aaf2e3bc057aa1d784455f8f4da66bc464733d6.tar.bz2