aboutsummaryrefslogtreecommitdiff
path: root/mlir/test/python/dialects/gpu/dialect.py
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test/python/dialects/gpu/dialect.py')
-rw-r--r--mlir/test/python/dialects/gpu/dialect.py96
1 files changed, 95 insertions, 1 deletions
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 66c4018..3945c99 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -2,7 +2,8 @@
from mlir.ir import *
import mlir.ir as ir
-import mlir.dialects.gpu as gpu
+from mlir.dialects import gpu, func, arith, math
+from mlir.extras import types as T
import mlir.dialects.gpu.passes
from mlir.passmanager import *
@@ -157,3 +158,96 @@ def testGPUFuncOp():
# CHECK: %[[VAL_0:.*]] = gpu.global_id x
# CHECK: gpu.return
# CHECK: }
+
+
+# CHECK-LABEL: testGPULaunchFuncOp
+@run
+def testGPULaunchFuncOp():
+ module = Module.create()
+
+ module.operation.attributes["gpu.container_module"] = UnitAttr.get()
+ with InsertionPoint(module.body):
+ gpu_module = gpu.GPUModuleOp("gpu_module")
+ block = gpu_module.bodyRegion.blocks.append()
+
+ with InsertionPoint(block):
+ gpu_func = gpu.GPUFuncOp(
+ FunctionType.get([], []),
+ "kernel",
+ body_builder=lambda func: gpu.return_([]),
+ kernel=True,
+ )
+
+ with InsertionPoint(module.body):
+ host = func.FuncOp(type=FunctionType.get([], []), name="host")
+
+ with InsertionPoint(host.add_entry_block()):
+ c1 = arith.constant(T.index(), 1)
+ grid_sizes = (1, 1, 1)
+ block_sizes = (1, 1, 1)
+ token = gpu.wait()
+ token = gpu.launch_func(
+ async_dependencies=[token],
+ kernel=[gpu_module.sym_name.value, gpu_func.name.value],
+ grid_size=grid_sizes,
+ block_size=block_sizes,
+ kernel_operands=[],
+ )
+ gpu.wait(async_dependencies=[token])
+ func.ReturnOp([])
+
+ print(module)
+
+ # CHECK-LABEL: gpu.module @gpu_module {
+ # CHECK: gpu.func @kernel() kernel {
+ # CHECK: gpu.return
+ # CHECK: }
+ # CHECK: }
+
+ # CHECK-LABEL: func.func @host() {
+ # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
+ # CHECK: %[[WAIT_0:.*]] = gpu.wait async
+ # CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index
+ # CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
+ # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index
+ # CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index
+ # CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index
+ # CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
+ # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]])
+ # CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]]
+ # CHECK: return
+ # CHECK: }
+
+
+# CHECK-LABEL: testGPULaunchOp
+@run
+def testGPULaunchOp():
+ module = Module.create()
+
+ with InsertionPoint(module.body):
+ host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf")
+
+ entry_block = host.add_entry_block()
+ with InsertionPoint(entry_block):
+ c1 = arith.constant(T.index(), 1)
+ grid_sizes = (c1, c1, c1)
+ block_sizes = (c1, c1, c1)
+
+ launch = gpu.launch(grid_sizes, block_sizes)
+
+ op = launch(lambda *args: gpu.printf("%f", args[0]))
+
+ with InsertionPoint(entry_block):
+ func.ReturnOp([])
+
+ print(module)
+
+ # CHECK-LABEL: func.func @gpu_printf(
+ # CHECK-SAME: %[[ARG0:.*]]: f32) {
+ # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index
+ # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) {
+ # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index
+ # CHECK: gpu.terminator
+ # CHECK: }
+ # CHECK: return
+ # CHECK: }