diff options
Diffstat (limited to 'mlir/test/python/dialects/gpu/dialect.py')
-rw-r--r-- | mlir/test/python/dialects/gpu/dialect.py | 96 |
1 files changed, 95 insertions, 1 deletions
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 66c4018..3945c99 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -2,7 +2,8 @@ from mlir.ir import * import mlir.ir as ir -import mlir.dialects.gpu as gpu +from mlir.dialects import gpu, func, arith, math +from mlir.extras import types as T import mlir.dialects.gpu.passes from mlir.passmanager import * @@ -157,3 +158,96 @@ def testGPUFuncOp(): # CHECK: %[[VAL_0:.*]] = gpu.global_id x # CHECK: gpu.return # CHECK: } + + +# CHECK-LABEL: testGPULaunchFuncOp +@run +def testGPULaunchFuncOp(): + module = Module.create() + + module.operation.attributes["gpu.container_module"] = UnitAttr.get() + with InsertionPoint(module.body): + gpu_module = gpu.GPUModuleOp("gpu_module") + block = gpu_module.bodyRegion.blocks.append() + + with InsertionPoint(block): + gpu_func = gpu.GPUFuncOp( + FunctionType.get([], []), + "kernel", + body_builder=lambda func: gpu.return_([]), + kernel=True, + ) + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([], []), name="host") + + with InsertionPoint(host.add_entry_block()): + c1 = arith.constant(T.index(), 1) + grid_sizes = (1, 1, 1) + block_sizes = (1, 1, 1) + token = gpu.wait() + token = gpu.launch_func( + async_dependencies=[token], + kernel=[gpu_module.sym_name.value, gpu_func.name.value], + grid_size=grid_sizes, + block_size=block_sizes, + kernel_operands=[], + ) + gpu.wait(async_dependencies=[token]) + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: gpu.module @gpu_module { + # CHECK: gpu.func @kernel() kernel { + # CHECK: gpu.return + # CHECK: } + # CHECK: } + + # CHECK-LABEL: func.func @host() { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: %[[WAIT_0:.*]] = gpu.wait async + # CHECK: %[[CONSTANT_1:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_5:.*]] = arith.constant 1 : index + # CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index + # CHECK: %[[LAUNCH_FUNC_0:.*]] = gpu.launch_func async {{\[}}%[[WAIT_0]]] @gpu_module::@kernel blocks in (%[[CONSTANT_1]], %[[CONSTANT_2]], %[[CONSTANT_3]]) threads in (%[[CONSTANT_4]], %[[CONSTANT_5]], %[[CONSTANT_6]]) + # CHECK: %[[WAIT_1:.*]] = gpu.wait async {{\[}}%[[LAUNCH_FUNC_0]]] + # CHECK: return + # CHECK: } + + +# CHECK-LABEL: testGPULaunchOp +@run +def testGPULaunchOp(): + module = Module.create() + + with InsertionPoint(module.body): + host = func.FuncOp(type=FunctionType.get([T.f32()], []), name="gpu_printf") + + entry_block = host.add_entry_block() + with InsertionPoint(entry_block): + c1 = arith.constant(T.index(), 1) + grid_sizes = (c1, c1, c1) + block_sizes = (c1, c1, c1) + + launch = gpu.launch(grid_sizes, block_sizes) + + op = launch(lambda *args: gpu.printf("%f", args[0])) + + with InsertionPoint(entry_block): + func.ReturnOp([]) + + print(module) + + # CHECK-LABEL: func.func @gpu_printf( + # CHECK-SAME: %[[ARG0:.*]]: f32) { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index + # CHECK: gpu.launch blocks(%[[VAL_0:.*]], %[[VAL_1:.*]], %[[VAL_2:.*]]) in (%[[VAL_3:.*]] = %[[CONSTANT_0]], %[[VAL_4:.*]] = %[[CONSTANT_0]], %[[VAL_5:.*]] = %[[CONSTANT_0]]) threads(%[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]]) in (%[[VAL_9:.*]] = %[[CONSTANT_0]], %[[VAL_10:.*]] = %[[CONSTANT_0]], %[[VAL_11:.*]] = %[[CONSTANT_0]]) { + # CHECK: gpu.printf "%[[VAL_12:.*]]", %[[VAL_0]] : index + # CHECK: gpu.terminator + # CHECK: } + # CHECK: return + # CHECK: } |