# RUN: %PYTHON %s | FileCheck %s from unittest import result from mlir.ir import ( Context, FunctionType, Location, Module, InsertionPoint, IntegerType, IndexType, MemRefType, F32Type, Block, ArrayAttr, Attribute, UnitAttr, StringAttr, DenseI32ArrayAttr, ShapedType, ) from mlir.dialects import openacc, func, arith, memref from mlir.extras import types def run(f): print("\n// TEST:", f.__name__) with Context(), Location.unknown(): f() return f @run def testParallelMemcpy(): module = Module.create() dynamic = ShapedType.get_dynamic_size() memref_f32_1d_any = MemRefType.get([dynamic], types.f32()) with InsertionPoint(module.body): function_type = FunctionType.get( [memref_f32_1d_any, memref_f32_1d_any, types.i64()], [] ) f = func.FuncOp( type=function_type, name="memcpy_idiom", ) f.attributes["sym_visibility"] = StringAttr.get("public") with InsertionPoint(f.add_entry_block()): c1024 = arith.ConstantOp(types.i32(), 1024) c128 = arith.ConstantOp(types.i32(), 128) arg0, arg1, arg2 = f.arguments copied = openacc.copyin( acc_var=arg0.type, var=arg0, var_type=types.f32(), bounds=[], async_operands=[], implicit=False, structured=True, ) created = openacc.create_( acc_var=arg1.type, var=arg1, var_type=types.f32(), bounds=[], async_operands=[], implicit=False, structured=True, ) parallel_op = openacc.ParallelOp( asyncOperands=[], waitOperands=[], numGangs=[c1024], numWorkers=[], vectorLength=[c128], reductionOperands=[], privateOperands=[], firstprivateOperands=[], dataClauseOperands=[], ) # Set required device_type and segment attributes to satisfy verifier acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type")]) parallel_op.numGangsDeviceType = acc_device_none parallel_op.numGangsSegments = DenseI32ArrayAttr.get([1]) parallel_op.vectorLengthDeviceType = acc_device_none parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[]) with InsertionPoint(parallel_block): c0 = arith.ConstantOp(types.i64(), 0) c1 = arith.ConstantOp(types.i64(), 1) loop_op = openacc.LoopOp( results_=[], lowerbound=[c0], upperbound=[f.arguments[2]], step=[c1], gangOperands=[], workerNumOperands=[], vectorOperands=[], tileOperands=[], cacheOperands=[], privateOperands=[], reductionOperands=[], firstprivateOperands=[], ) # Set loop attributes: gang and independent on device_type acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type")]) loop_op.gang = acc_device_none loop_op.independent = acc_device_none loop_block = Block.create_at_start( parent=loop_op.region, arg_types=[types.i64()] ) with InsertionPoint(loop_block): idx = arith.index_cast(out=IndexType.get(), in_=loop_block.arguments[0]) val = memref.load(memref=copied, indices=[idx]) memref.store(value=val, memref=created, indices=[idx]) openacc.YieldOp([]) openacc.YieldOp([]) deleted = openacc.delete( acc_var=copied, bounds=[], async_operands=[], implicit=False, structured=True, ) copied = openacc.copyout( acc_var=created, var=arg1, var_type=types.f32(), bounds=[], async_operands=[], implicit=False, structured=True, ) func.ReturnOp([]) print(module) # CHECK: TEST: testParallelMemcpy # CHECK-LABEL: func.func public @memcpy_idiom( # CHECK-SAME: %[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: i64) { # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32 # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32 # CHECK: %[[COPYIN_0:.*]] = acc.copyin varPtr(%[[ARG0]] : memref) -> memref # CHECK: %[[CREATE_0:.*]] = acc.create varPtr(%[[ARG1]] : memref) -> memref # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) { # CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i64 # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64 # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) { # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index # CHECK: %[[LOAD_0:.*]] = memref.load %[[COPYIN_0]]{{\[}}%[[INDEX_CAST_0]]] : memref # CHECK: memref.store %[[LOAD_0]], %[[CREATE_0]]{{\[}}%[[INDEX_CAST_0]]] : memref # CHECK: acc.yield # CHECK: } attributes {independent = [#acc.device_type]} # CHECK: acc.yield # CHECK: } # CHECK: acc.delete accPtr(%[[COPYIN_0]] : memref) # CHECK: acc.copyout accPtr(%[[CREATE_0]] : memref) to varPtr(%[[ARG1]] : memref) # CHECK: return # CHECK: }