aboutsummaryrefslogtreecommitdiff
path: root/mlir/test
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test')
-rw-r--r--mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir67
-rw-r--r--mlir/test/Analysis/test-alias-analysis.mlir16
-rw-r--r--mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir76
-rw-r--r--mlir/test/Conversion/MathToXeVM/lit.local.cfg7
-rw-r--r--mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir199
-rw-r--r--mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir119
-rw-r--r--mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir86
-rw-r--r--mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir12
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/dpas.mlir8
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/loadstore_matrix.mlir201
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir29
-rw-r--r--mlir/test/Dialect/AMDGPU/invalid.mlir24
-rw-r--r--mlir/test/Dialect/AMDGPU/ops.mlir55
-rw-r--r--mlir/test/Dialect/AMX/legalize-for-llvm.mlir88
-rw-r--r--mlir/test/Dialect/AMX/roundtrip.mlir28
-rw-r--r--mlir/test/Dialect/Affine/canonicalize.mlir130
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir6
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir32
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir38
-rw-r--r--mlir/test/Dialect/LLVMIR/bytecode.mlir35
-rw-r--r--mlir/test/Dialect/LLVMIR/canonicalize.mlir11
-rw-r--r--mlir/test/Dialect/LLVMIR/debuginfo.mlir1
-rw-r--r--mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir14
-rw-r--r--mlir/test/Dialect/LLVMIR/rocdl.mlir14
-rw-r--r--mlir/test/Dialect/LLVMIR/roundtrip.mlir6
-rw-r--r--mlir/test/Dialect/Linalg/decompose-pack.mlir61
-rw-r--r--mlir/test/Dialect/Linalg/match-ops-interpreter.mlir14
-rw-r--r--mlir/test/Dialect/Linalg/one-shot-bufferize.mlir16
-rw-r--r--mlir/test/Dialect/Linalg/transform-op-fuse.mlir88
-rw-r--r--mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir181
-rw-r--r--mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir133
-rw-r--r--mlir/test/Dialect/MemRef/canonicalize.mlir30
-rw-r--r--mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir4
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir102
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-private.mlir82
-rw-r--r--mlir/test/Dialect/SCF/one-shot-bufferize.mlir12
-rw-r--r--mlir/test/Dialect/Tensor/one-shot-bufferize.mlir41
-rw-r--r--mlir/test/Dialect/Tensor/tiling.mlir2
-rw-r--r--mlir/test/Dialect/Tosa/tosa-attach-target.mlir8
-rw-r--r--mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir21
-rw-r--r--mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir20
-rw-r--r--mlir/test/Dialect/Vector/canonicalize/vector-step.mlir311
-rw-r--r--mlir/test/Dialect/Vector/vector-unroll-options.mlir68
-rw-r--r--mlir/test/Dialect/Vector/vector-warp-distribute.mlir54
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/global.mlir4
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/if.mlir8
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/import.mlir8
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/local.mlir12
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/memory.mlir8
-rw-r--r--mlir/test/Dialect/WasmSSA/custom_parser/table.mlir6
-rw-r--r--mlir/test/Dialect/WasmSSA/extend-invalid.mlir4
-rw-r--r--mlir/test/Dialect/WasmSSA/global-invalid.mlir12
-rw-r--r--mlir/test/Dialect/WasmSSA/locals-invalid.mlir4
-rw-r--r--mlir/test/Dialect/XeGPU/invalid.mlir36
-rw-r--r--mlir/test/Dialect/XeGPU/ops.mlir60
-rw-r--r--mlir/test/Dialect/XeGPU/xegpu-blocking.mlir98
-rw-r--r--mlir/test/Examples/standalone/test.wheel.toy1
-rw-r--r--mlir/test/Integration/GPU/SPIRV/simple_add.mlir11
-rw-r--r--mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir24
-rw-r--r--mlir/test/Pass/remark-final.mlir17
-rw-r--r--mlir/test/Target/Cpp/expressions.mlir232
-rw-r--r--mlir/test/Target/LLVMIR/Import/debug-info.ll3
-rw-r--r--mlir/test/Target/LLVMIR/Import/function-attributes.ll6
-rw-r--r--mlir/test/Target/LLVMIR/amx.mlir13
-rw-r--r--mlir/test/Target/LLVMIR/llvmir.mlir11
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir12
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir-invalid.mlir30
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir.mlir4
-rw-r--r--mlir/test/Target/LLVMIR/rocdl.mlir97
-rw-r--r--mlir/test/Target/Wasm/abs.mlir4
-rw-r--r--mlir/test/Target/Wasm/add_div.mlir40
-rw-r--r--mlir/test/Target/Wasm/and.mlir4
-rw-r--r--mlir/test/Target/Wasm/block.mlir16
-rw-r--r--mlir/test/Target/Wasm/block_complete_type.mlir24
-rw-r--r--mlir/test/Target/Wasm/block_value_type.mlir19
-rw-r--r--mlir/test/Target/Wasm/branch_if.mlir29
-rw-r--r--mlir/test/Target/Wasm/call.mlir17
-rw-r--r--mlir/test/Target/Wasm/clz.mlir4
-rw-r--r--mlir/test/Target/Wasm/comparison_ops.mlir269
-rw-r--r--mlir/test/Target/Wasm/const.mlir8
-rw-r--r--mlir/test/Target/Wasm/convert.mlir85
-rw-r--r--mlir/test/Target/Wasm/copysign.mlir4
-rw-r--r--mlir/test/Target/Wasm/ctz.mlir4
-rw-r--r--mlir/test/Target/Wasm/demote.mlir15
-rw-r--r--mlir/test/Target/Wasm/div.mlir20
-rw-r--r--mlir/test/Target/Wasm/double_nested_loop.mlir63
-rw-r--r--mlir/test/Target/Wasm/empty_blocks_list_and_stack.mlir53
-rw-r--r--mlir/test/Target/Wasm/eq.mlir56
-rw-r--r--mlir/test/Target/Wasm/eqz.mlir21
-rw-r--r--mlir/test/Target/Wasm/extend.mlir69
-rw-r--r--mlir/test/Target/Wasm/global.mlir16
-rw-r--r--mlir/test/Target/Wasm/if.mlir112
-rw-r--r--mlir/test/Target/Wasm/import.mlir12
-rw-r--r--mlir/test/Target/Wasm/inputs/add_div.yaml.wasm50
-rw-r--r--mlir/test/Target/Wasm/inputs/block.yaml.wasm22
-rw-r--r--mlir/test/Target/Wasm/inputs/block_complete_type.yaml.wasm23
-rw-r--r--mlir/test/Target/Wasm/inputs/block_value_type.yaml.wasm18
-rw-r--r--mlir/test/Target/Wasm/inputs/branch_if.yaml.wasm18
-rw-r--r--mlir/test/Target/Wasm/inputs/call.yaml.wasm26
-rw-r--r--mlir/test/Target/Wasm/inputs/comparison_ops.yaml.wasm88
-rw-r--r--mlir/test/Target/Wasm/inputs/convert.yaml.wasm69
-rw-r--r--mlir/test/Target/Wasm/inputs/demote.yaml.wasm18
-rw-r--r--mlir/test/Target/Wasm/inputs/double_nested_loop.yaml.wasm19
-rw-r--r--mlir/test/Target/Wasm/inputs/empty_blocks_list_and_stack.yaml.wasm21
-rw-r--r--mlir/test/Target/Wasm/inputs/eq.yaml.wasm27
-rw-r--r--mlir/test/Target/Wasm/inputs/eqz.yaml.wasm29
-rw-r--r--mlir/test/Target/Wasm/inputs/extend.yaml.wasm40
-rw-r--r--mlir/test/Target/Wasm/inputs/if.yaml.wasm25
-rw-r--r--mlir/test/Target/Wasm/inputs/loop.yaml.wasm17
-rw-r--r--mlir/test/Target/Wasm/inputs/loop_with_inst.yaml.wasm20
-rw-r--r--mlir/test/Target/Wasm/inputs/ne.yaml.wasm27
-rw-r--r--mlir/test/Target/Wasm/inputs/promote.yaml.wasm18
-rw-r--r--mlir/test/Target/Wasm/inputs/reinterpret.yaml.wasm53
-rw-r--r--mlir/test/Target/Wasm/inputs/rounding.yaml.wasm37
-rw-r--r--mlir/test/Target/Wasm/inputs/wrap.yaml.wasm24
-rw-r--r--mlir/test/Target/Wasm/invalid_block_type_index.yaml28
-rw-r--r--mlir/test/Target/Wasm/local.mlir6
-rw-r--r--mlir/test/Target/Wasm/loop.mlir17
-rw-r--r--mlir/test/Target/Wasm/loop_with_inst.mlir33
-rw-r--r--mlir/test/Target/Wasm/max.mlir4
-rw-r--r--mlir/test/Target/Wasm/memory_min_eq_max.mlir2
-rw-r--r--mlir/test/Target/Wasm/memory_min_max.mlir2
-rw-r--r--mlir/test/Target/Wasm/memory_min_no_max.mlir2
-rw-r--r--mlir/test/Target/Wasm/min.mlir4
-rw-r--r--mlir/test/Target/Wasm/ne.mlir52
-rw-r--r--mlir/test/Target/Wasm/neg.mlir4
-rw-r--r--mlir/test/Target/Wasm/or.mlir4
-rw-r--r--mlir/test/Target/Wasm/popcnt.mlir4
-rw-r--r--mlir/test/Target/Wasm/promote.mlir14
-rw-r--r--mlir/test/Target/Wasm/reinterpret.mlir46
-rw-r--r--mlir/test/Target/Wasm/rem.mlir8
-rw-r--r--mlir/test/Target/Wasm/rotl.mlir4
-rw-r--r--mlir/test/Target/Wasm/rotr.mlir4
-rw-r--r--mlir/test/Target/Wasm/rounding.mlir50
-rw-r--r--mlir/test/Target/Wasm/shl.mlir4
-rw-r--r--mlir/test/Target/Wasm/shr_s.mlir4
-rw-r--r--mlir/test/Target/Wasm/shr_u.mlir4
-rw-r--r--mlir/test/Target/Wasm/sqrt.mlir4
-rw-r--r--mlir/test/Target/Wasm/sub.mlir8
-rw-r--r--mlir/test/Target/Wasm/wrap.mlir15
-rw-r--r--mlir/test/Target/Wasm/xor.mlir4
-rw-r--r--mlir/test/lib/Analysis/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp86
-rw-r--r--mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp26
-rw-r--r--mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp6
-rw-r--r--mlir/test/lib/Dialect/OpenACC/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp6
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp8
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp110
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttrDefs.td17
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.cpp18
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.td5
-rw-r--r--mlir/test/lib/Dialect/Test/TestOpDefs.cpp44
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td15
-rw-r--r--mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp6
-rw-r--r--mlir/test/lib/Pass/TestRemarksPass.cpp7
-rw-r--r--mlir/test/lit.cfg.py3
-rw-r--r--mlir/test/lit.site.cfg.py.in1
-rw-r--r--mlir/test/mlir-pdll/CodeGen/CPP/general.pdll2
-rw-r--r--mlir/test/mlir-tblgen/cpp-class-comments.td10
-rw-r--r--mlir/test/mlir-tblgen/dialect.td5
-rw-r--r--mlir/test/python/dialects/gpu/dialect.py93
-rw-r--r--mlir/test/python/dialects/openacc.py171
-rw-r--r--mlir/test/python/dialects/transform_structured_ext.py60
-rw-r--r--mlir/test/python/ir/operation.py28
-rw-r--r--mlir/test/python/rewrite.py7
168 files changed, 5511 insertions, 482 deletions
diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
new file mode 100644
index 0000000..808c1c2
--- /dev/null
+++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s
+
+func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index
+ %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index
+
+ // Test subview with unknown sizes, and constant offsets and strides.
+ // CHECK: Op: %[[SV0:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}]
+ // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}]
+ %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with bounded dynamic offsets.
+ // CHECK: Op: %[[SV1:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with constant operands.
+ // CHECK: Op: %[[SV2:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a rank-reducing subview.
+ // CHECK: Op: %[[SV3:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>>
+
+ // Test a subview of a rank-reducing subview
+ // CHECK: Op: %[[SV4:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>>
+
+ // Test a subview with mixed bounded and unbound dynamic sizes.
+ // CHECK: Op: %[[SV5:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}]
+ // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}]
+ %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+ return
+}
+
+// CHECK: func.func @memref_subview
+// CHECK: %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>>
+// CHECK: %[[SV0]] = memref.subview %[[A0]]
+// CHECK-NEXT: %[[SV1]] = memref.subview
+// CHECK-NEXT: %[[SV2]] = memref.subview
+// CHECK-NEXT: %[[SV3]] = memref.subview
+// CHECK-NEXT: %[[SV4]] = memref.subview
+// CHECK-NEXT: %[[SV5]] = memref.subview
diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir
index 8cbee61..d71adee 100644
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@@ -256,3 +256,19 @@ func.func @constants(%arg: memref<2xf32>) attributes {test.ptr = "func"} {
return
}
+
+// -----
+
+// CHECK-LABEL: Testing : "distinct_objects"
+// CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias
+
+// CHECK-DAG: distinct#0 <-> distinct#1: NoAlias
+// CHECK-DAG: distinct#0 <-> func.region0#0: MustAlias
+// CHECK-DAG: distinct#1 <-> func.region0#0: MayAlias
+// CHECK-DAG: distinct#0 <-> func.region0#1: MayAlias
+// CHECK-DAG: distinct#1 <-> func.region0#1: MustAlias
+
+func.func @distinct_objects(%arg: memref<?xf32>, %arg1: memref<?xf32>) attributes {test.ptr = "func"} {
+ %0, %1 = memref.distinct_objects %arg, %arg1 {test.ptr = "distinct"} : memref<?xf32>, memref<?xf32>
+ return
+}
diff --git a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
index dbff233..455f886 100644
--- a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
+++ b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
@@ -1,4 +1,5 @@
-// RUN: mlir-opt %s -convert-math-to-rocdl -allow-unregistered-dialect -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -split-input-file -pass-pipeline='builtin.module(convert-math-to-rocdl{chipset=gfx803})' | FileCheck %s --check-prefix=PRE9
+// RUN: mlir-opt %s -allow-unregistered-dialect -split-input-file -pass-pipeline='builtin.module(convert-math-to-rocdl{chipset=gfx942})' | FileCheck %s --check-prefix=POST9
module @test_module {
// CHECK: llvm.func @__ocml_fmod_f16(f16, f16) -> f16
@@ -596,3 +597,76 @@ module @test_module {
func.return %result : vector<2x2xf16>
}
}
+
+// -----
+
+// f16 clamp → rocdl.fmed3 on gfx9+
+// CHECK-LABEL: func.func @clampf_f16
+func.func @clampf_f16(%x: f16, %lo: f16, %hi: f16) -> f16 {
+ %r = math.clampf %x to [%lo, %hi] : f16
+ return %r : f16
+ // POST9: rocdl.fmed3 {{.*}} : f16
+ // PRE9-NOT: rocdl.fmed3
+ // PRE9: math.clampf {{.*}} : f16
+}
+
+// f32 clamp → rocdl.fmed3 on gfx9+
+// CHECK-LABEL: func.func @clampf_f32
+func.func @clampf_f32(%x: f32, %lo: f32, %hi: f32) -> f32 {
+ %r = math.clampf %x to [%lo, %hi] : f32
+ return %r : f32
+ // POST9: rocdl.fmed3 {{.*}} : f32
+ // PRE9-NOT: rocdl.fmed3
+ // PRE9: math.clampf {{.*}} : f32
+}
+
+// -----
+
+// Vector f16 clamp → rocdl.fmed3 on gfx9+
+// CHECK-LABEL: func.func @clampf_vector_f16
+func.func @clampf_vector_f16(%x: vector<2xf16>, %lo: vector<2xf16>, %hi: vector<2xf16>) -> vector<2xf16> {
+ %r = math.clampf %x to [%lo, %hi] : vector<2xf16>
+ return %r : vector<2xf16>
+ // POST9: rocdl.fmed3 {{.*}} : vector<2xf16>
+ // PRE9-NOT: rocdl.fmed3
+ // PRE9: math.clampf {{.*}} : vector<2xf16>
+}
+
+// -----
+
+// Vector f32 clamp → rocdl.fmed3 on gfx9+
+// CHECK-LABEL: func.func @clampf_vector_f32
+func.func @clampf_vector_f32(%x: vector<2xf32>, %lo: vector<2xf32>, %hi: vector<2xf32>) -> vector<2xf32> {
+ %r = math.clampf %x to [%lo, %hi] : vector<2xf32>
+ return %r : vector<2xf32>
+ // POST9: rocdl.fmed3 {{.*}} : vector<2xf32>
+ // PRE9-NOT: rocdl.fmed3
+ // PRE9: math.clampf {{.*}} : vector<2xf32>
+}
+
+// -----
+
+// Multi-dimensional vector f16 clamp → rocdl.fmed3 on gfx9+ (unrolled to 1D vectors)
+// CHECK-LABEL: func.func @clampf_vector_2d_f16
+func.func @clampf_vector_2d_f16(%x: vector<2x2xf16>, %lo: vector<2x2xf16>, %hi: vector<2x2xf16>) -> vector<2x2xf16> {
+ %r = math.clampf %x to [%lo, %hi] : vector<2x2xf16>
+ return %r : vector<2x2xf16>
+ // POST9: builtin.unrealized_conversion_cast {{.*}} : vector<2x2xf16> to !llvm.array<2 x vector<2xf16>>
+ // POST9: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
+ // POST9: rocdl.fmed3 {{.*}} : vector<2xf16>
+ // POST9: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
+ // POST9: llvm.extractvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
+ // POST9: rocdl.fmed3 {{.*}} : vector<2xf16>
+ // POST9: llvm.insertvalue {{.*}} : !llvm.array<2 x vector<2xf16>>
+ // PRE9-NOT: rocdl.fmed3
+ // PRE9: math.clampf {{.*}} : vector<2x2xf16>
+}
+
+// -----
+// CHECK-LABEL: func.func @clampf_bf16
+func.func @clampf_bf16(%x: bf16, %lo: bf16, %hi: bf16) -> bf16 {
+ %r = math.clampf %x to [%lo, %hi] : bf16
+ return %r : bf16
+ // CHECK: math.clampf {{.*}} : bf16
+ // CHECK-NOT: rocdl.fmed3
+}
diff --git a/mlir/test/Conversion/MathToXeVM/lit.local.cfg b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
new file mode 100644
index 0000000..cc1ce35
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
@@ -0,0 +1,7 @@
+spirv_backend_tests = [
+ 'native-spirv-builtins.mlir',
+]
+
+# Exclude SPIRV backend tests if SPIRV target is disabled:
+if(not config.run_xevm_tests):
+ config.excludes.update(spirv_backend_tests)
diff --git a/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
new file mode 100644
index 0000000..c61640c
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
@@ -0,0 +1,199 @@
+// RUN: mlir-opt %s -convert-math-to-xevm \
+// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-ARITH'
+// RUN: mlir-opt %s -convert-math-to-xevm='convert-arith=false' \
+// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-NO-ARITH'
+
+// RUN: mlir-opt --pass-pipeline="builtin.module(convert-math-to-xevm)" %s \
+// RUN: | FileCheck %s -check-prefixes='CHECK-MODULE,CHECK-ENTIRE-MODULE'
+// RUN: mlir-opt --pass-pipeline="builtin.module(gpu.module(convert-math-to-xevm))" %s \
+// RUN: | FileCheck %s -check-prefixes='CHECK-MODULE,CHECK-ONLY-GPU'
+
+// This test:
+// - check that MathToXeVM converts fastmath math/arith ops properly;
+// - check that MathToXeVM handles nested modules while respecting pass manager.
+
+module @test_module {
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+ //
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv2_d(vector<2xf64>) -> vector<2xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv3_d(vector<3xf64>) -> vector<3xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_d(vector<4xf64>) -> vector<4xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv8_d(vector<8xf64>) -> vector<8xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_d(vector<16xf64>) -> vector<16xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_f(vector<16xf32>) -> vector<16xf32>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_Dh(vector<4xf16>) -> vector<4xf16>
+ //
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_log10d(f64) -> f64
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_powrDhDh(f16, f16) -> f16
+ // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+ // CHECK-ARITH-DAG: llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+
+ // CHECK-LABEL: func @math_ops
+ func.func @math_ops() {
+
+ %c1_f16 = arith.constant 1. : f16
+ %c1_f32 = arith.constant 1. : f32
+ %c1_f64 = arith.constant 1. : f64
+
+ // CHECK: math.exp
+ %exp_normal_f16 = math.exp %c1_f16 : f16
+ // CHECK: math.exp
+ %exp_normal_f32 = math.exp %c1_f32 : f32
+ // CHECK: math.exp
+ %exp_normal_f64 = math.exp %c1_f64 : f64
+
+ // Check float operations are converted properly:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+ %exp_fast_f16 = math.exp %c1_f16 fastmath<fast> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f32) -> f32
+ %exp_fast_f32 = math.exp %c1_f32 fastmath<fast> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f64) -> f64
+ %exp_fast_f64 = math.exp %c1_f64 fastmath<fast> : f64
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %exp_afn_f16 = math.exp %c1_f16 fastmath<afn> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %exp_afn_f32 = math.exp %c1_f32 fastmath<afn> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %exp_afn_f64 = math.exp %c1_f64 fastmath<afn> : f64
+
+ // CHECK: math.exp
+ %exp_none_f16 = math.exp %c1_f16 fastmath<none> : f16
+ // CHECK: math.exp
+ %exp_none_f32 = math.exp %c1_f32 fastmath<none> : f32
+ // CHECK: math.exp
+ %exp_none_f64 = math.exp %c1_f64 fastmath<none> : f64
+
+ // Check vector operations:
+
+ %v2_c1_f64 = arith.constant dense<1.> : vector<2xf64>
+ %v3_c1_f64 = arith.constant dense<1.> : vector<3xf64>
+ %v4_c1_f64 = arith.constant dense<1.> : vector<4xf64>
+ %v8_c1_f64 = arith.constant dense<1.> : vector<8xf64>
+ %v16_c1_f64 = arith.constant dense<1.> : vector<16xf64>
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv2_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<2xf64>) -> vector<2xf64>
+ %exp_v2_f64 = math.exp %v2_c1_f64 fastmath<afn> : vector<2xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv3_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<3xf64>) -> vector<3xf64>
+ %exp_v3_f64 = math.exp %v3_c1_f64 fastmath<afn> : vector<3xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<4xf64>) -> vector<4xf64>
+ %exp_v4_f64 = math.exp %v4_c1_f64 fastmath<afn> : vector<4xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv8_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+ %exp_v8_f64 = math.exp %v8_c1_f64 fastmath<afn> : vector<8xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf64>) -> vector<16xf64>
+ %exp_v16_f64 = math.exp %v16_c1_f64 fastmath<afn> : vector<16xf64>
+
+ %v16_c1_f32 = arith.constant dense<1.> : vector<16xf32>
+ %v4_c1_f16 = arith.constant dense<1.> : vector<4xf16>
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_f(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<16xf32>) -> vector<16xf32>
+ %exp_v16_f32 = math.exp %v16_c1_f32 fastmath<fast> : vector<16xf32>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_Dh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<4xf16>) -> vector<4xf16>
+ %exp_v4_f16 = math.exp %v4_c1_f16 fastmath<fast> : vector<4xf16>
+
+ // Check unsupported vector sizes are not converted:
+
+ %v5_c1_f64 = arith.constant dense<1.> : vector<5xf64>
+ %v32_c1_f64 = arith.constant dense<1.> : vector<32xf64>
+
+ // CHECK: math.exp
+ %exp_v5_f64 = math.exp %v5_c1_f64 fastmath<afn> : vector<5xf64>
+ // CHECK: math.exp
+ %exp_v32_f64 = math.exp %v32_c1_f64 fastmath<afn> : vector<32xf64>
+
+ // Check fastmath flags propagate properly:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+ %exp_fastmath_all_f16 = math.exp %c1_f16 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, ninf, nsz, arcp, contract, afn>} : (f32) -> f32
+ %exp_fastmath_most_f32 = math.exp %c1_f32 fastmath<nnan,ninf,nsz,arcp,contract,afn> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, afn, reassoc>} : (f32) -> f32
+ %exp_afn_reassoc_nnan_f32 = math.exp %c1_f32 fastmath<afn,reassoc,nnan> : f32
+
+ // Check all other math operations:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_cosDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %cos_afn_f16 = math.cos %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_exp2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %exp2_afn_f32 = math.exp2 %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_logDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %log_afn_f16 = math.log %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_log2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %log2_afn_f32 = math.log2 %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z24__spirv_ocl_native_log10d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %log10_afn_f64 = math.log10 %c1_f64 fastmath<afn> : f64
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_powrDhDh(%{{.*}}, %{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16, f16) -> f16
+ %powr_afn_f16 = math.powf %c1_f16, %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z24__spirv_ocl_native_rsqrtd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %rsqrt_afn_f64 = math.rsqrt %c1_f64 fastmath<afn> : f64
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_sinDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %sin_afn_f16 = math.sin %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_sqrtf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %sqrt_afn_f32 = math.sqrt %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_tand(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %tan_afn_f64 = math.tan %c1_f64 fastmath<afn> : f64
+
+ %c6_9_f32 = arith.constant 6.9 : f32
+ %c7_f32 = arith.constant 7. : f32
+
+ // CHECK-ARITH: llvm.call @_Z25__spirv_ocl_native_divideff(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+ // CHECK-NO-ARITH: arith.divf
+ %divf_afn_f32 = arith.divf %c6_9_f32, %c7_f32 fastmath<afn> : f32
+
+ return
+ }
+
+ // Check that MathToXeVM handles nested modules while respecting pass manager:
+
+ // CHECK-ENTIRE-MODULE: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ // CHECK-ONLY-GPU-NOT: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+
+ // CHECK-MODULE-LABEL: @test_gpu
+ gpu.module @test_gpu {
+ // CHECK-MODULE: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ gpu.func @exp_gpu() {
+ %c1_f32 = arith.constant 1. : f32
+
+ // CHECK-MODULE: math.exp
+ %exp_normal_f32 = math.exp %c1_f32 : f32
+
+ // CHECK-MODULE: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %exp_fast_f32 = math.exp %c1_f32 fastmath<afn> : f32
+
+ gpu.return
+ }
+ }
+
+ // CHECK-MODULE-LABEL: @exp_func
+ func.func @exp_func() {
+ %c1_f32 = arith.constant 1. : f32
+
+ // CHECK-MODULE: math.exp
+ %exp_normal_f32 = math.exp %c1_f32 : f32
+
+ // CHECK-ENTIRE-MODULE: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK-ONLY-GPU: math.exp
+ %exp_fast_f32 = math.exp %c1_f32 fastmath<afn> : f32
+
+ return
+ }
+}
diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
new file mode 100644
index 0000000..82426c4
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
@@ -0,0 +1,119 @@
+// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
+// RUN: -debug-only=serialize-to-isa 2> %t
+// RUN: FileCheck --input-file=%t %s
+// REQUIRES: asserts
+//
+// MathToXeVM pass generates OpenCL intrinsics function calls when converting
+// Math ops with `fastmath` attr to native function calls. It is assumed that
+// the SPIRV backend would correctly convert these intrinsics calls to OpenCL
+// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp).
+//
+// To ensure this assumption holds, this test verifies that the SPIRV backend
+// behaves as expected.
+
+module @test_ocl_intrinsics attributes {gpu.container_module} {
+ gpu.module @kernel [#xevm.target] {
+ llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} {
+ // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16
+ // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]]
+ %c0_f16 = llvm.mlir.constant(0. : f16) : f16
+ // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32
+ // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]]
+ %c0_f32 = llvm.mlir.constant(0. : f32) : f32
+ // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64
+ // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]]
+ %c0_f64 = llvm.mlir.constant(0. : f64) : f64
+
+ // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2
+ // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]]
+ %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64>
+ // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3
+ // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]]
+ %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32>
+ // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4
+ // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]]
+ %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64>
+ // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8
+ // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]]
+ %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64>
+ // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16
+ // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]]
+ %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16>
+
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]]
+ %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]]
+ %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]]
+ %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64
+
+ // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]]
+ %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64>
+ // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]]
+ %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32>
+ // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]]
+ %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64>
+ // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]]
+ %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64>
+ // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]]
+ %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16>
+
+ // SPIRV backend does not currently handle fastmath flags: The SPIRV
+ // backend would need to generate OpDecorate calls to decorate math ops
+ // with FPFastMathMode/FPFastMathModeINTEL decorations.
+ //
+ // FIXME: When support for fastmath flags in the SPIRV backend is added,
+ // add tests here to ensure fastmath flags are converted to the correct
+ // OpDecorate calls.
+ //
+ // See:
+ // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions
+ // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate
+
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]]
+ %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]]
+ %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]]
+ %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]]
+ %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]]
+ %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+ // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]]
+ %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]]
+ %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]]
+ %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]]
+ %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]]
+ %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]]
+ %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+
+ llvm.return
+ }
+
+ llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+ llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+ llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32>
+ llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16>
+ llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+ llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64>
+ llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+ llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+ llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+ llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+ }
+}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index a7a73ae..780c25a 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1538,6 +1538,92 @@ func.func @unsupportedRescaleInexactRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>
// -----
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const(%arg0 : tensor<2xi8>, %multiplier : tensor<1xi32>, %shift : tensor<1xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+ // CHECK: [[MULTIPLIER:%.+]] = tensor.collapse_shape %arg1 [] : tensor<1xi32> into tensor<i32>
+ // CHECK: [[SHIFT:%.+]] = tensor.collapse_shape %arg2 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[MULTIPLIER]], [[SHIFT]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<i32>, tensor<i8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c-128_i32 = arith.constant -128 : i32
+ // CHECK: %c127_i32 = arith.constant 127 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = false, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+ return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c-128_i32 = arith.constant -128 : i32
+ // CHECK: %c127_i32 = arith.constant 127 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+ return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel_input_output_zp_ui8
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel_input_output_zp_ui8(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xui8>, %output_zp : tensor<1xui8>) -> (tensor<2xui8>) {
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xui8> into tensor<ui8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xui8> into tensor<ui8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xui8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<ui8>, tensor<ui8>) outs([[INIT]] : tensor<2xui8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: ui8, [[ARG4:%.*]]: ui8, [[OUT:%.*]]: ui8):
+ // CHECK: [[INPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG3]] : ui8 to i8
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extui [[INPUT_ZP_I8]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG4]] : ui8 to i8
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extui [[OUTPUT_ZP_I8]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c0_i32 = arith.constant 0 : i32
+ // CHECK: %c255_i32 = arith.constant 255 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c0_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c255_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = true} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xui8>, tensor<1xui8>) -> tensor<2xui8>
+ return %0 : tensor<2xui8>
+}
+
+// -----
+
// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-LABEL: @reverse
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 2d33888..d669a3b 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -76,6 +76,18 @@ func.func @broadcast_vec1d_from_f32(%arg0: f32) -> vector<2xf32> {
// -----
+func.func @broadcast_single_elem_vec1d_from_f32(%arg0: f32) -> vector<1xf32> {
+ %0 = vector.broadcast %arg0 : f32 to vector<1xf32>
+ return %0 : vector<1xf32>
+}
+// CHECK-LABEL: @broadcast_single_elem_vec1d_from_f32
+// CHECK-SAME: %[[A:.*]]: f32)
+// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]]
+// CHECK-NOT: llvm.shufflevector
+// CHECK: return %[[T0]] : vector<1xf32>
+
+// -----
+
func.func @broadcast_vec1d_from_f32_scalable(%arg0: f32) -> vector<[2]xf32> {
%0 = vector.broadcast %arg0 : f32 to vector<[2]xf32>
return %0 : vector<[2]xf32>
diff --git a/mlir/test/Conversion/XeGPUToXeVM/dpas.mlir b/mlir/test/Conversion/XeGPUToXeVM/dpas.mlir
index e6f22f0..a9ab0be 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/dpas.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/dpas.mlir
@@ -1,17 +1,13 @@
// RUN: mlir-opt -convert-xegpu-to-xevm %s | FileCheck %s
-#sg_map_a_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
-#sg_map_b_f16 = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>
-#sg_map_c_f32 = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>
-
-gpu.module @load_store_check {
+gpu.module @test_kernel {
// CHECK-LABEL: func.func @dpas(
// CHECK-SAME: %[[ARG0:.*]]: vector<8xf16>, %[[ARG1:.*]]: vector<16xf16>, %[[ARG2:.*]]: vector<8xf32>
func.func @dpas(%a_loaded: vector<8xf16>, %b_loaded: vector<16xf16>, %c_loaded: vector<8xf32>) -> vector<8xf32> {
// Loads are checked in a separate test.
// CHECK: %[[D:.*]] = xevm.mma %[[ARG0]], %[[ARG1]], %[[ARG2]] {shape = <m = 8, n = 16, k = 16>, types = <d = f32, a = f16, b = f16, c = f32>}
// CHECK-SAME: : (vector<8xf16>, vector<16xf16>, vector<8xf32>) -> vector<8xf32>
- %d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded {a_layout = #sg_map_a_f16, b_layout = #sg_map_b_f16, c_layout = #sg_map_c_f32}
+ %d = xegpu.dpas %a_loaded, %b_loaded, %c_loaded
: vector<8xf16>, vector<16xf16>, vector<8xf32> -> vector<8xf32>
return %d : vector<8xf32>
}
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstore_matrix.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstore_matrix.mlir
new file mode 100644
index 0000000..d4cb493
--- /dev/null
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstore_matrix.mlir
@@ -0,0 +1,201 @@
+// RUN: mlir-opt -split-input-file -convert-xegpu-to-xevm -cse %s | FileCheck %s
+
+gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
+
+ // e.g. for mem_desc<32x32xf16, @strides=[1, 16]>
+ // its memory layout tuple is (blocked shape = [1,1,32,32],strides=[1024,1024,32,1])
+ //CHECK-LABEL: load_store_matrix_1
+ gpu.func @load_store_matrix_1(%arg0: memref<4096xi8, 3>) -> f32 {
+ %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x32xf32>
+
+ //CHECK: %[[TID:.*]] = gpu.thread_id x
+ //CHECK: %[[C1:.*]] = arith.constant 1 : index
+ //CHECK: %[[MUL1:.*]] = arith.muli %[[TID]], %[[C1]] : index
+ //CHECK: %[[C4:.*]] = arith.constant 4 : i32
+ //CHECK: %[[MUL2:.*]] = arith.muli {{.*}}, %[[C4]] : i32
+ //CHECK: llvm.load {{.*}} : !llvm.ptr<3> -> f32
+
+ %tid_x = gpu.thread_id x
+ %c0 = arith.constant 0 : index
+ %1 = xegpu.load_matrix %0[%c0, %tid_x]: !xegpu.mem_desc<32x32xf32>, index, index -> f32
+
+ //CHECK: llvm.store {{.*}}, {{.*}} : f32, !llvm.ptr<3>
+
+ xegpu.store_matrix %1, %0[%c0, %tid_x]: f32, !xegpu.mem_desc<32x32xf32>, index, index
+
+ gpu.return %1: f32
+ }
+
+// e.g. for mem_desc<32x64xf16, @block=[16, 16], @strides=[1, 32]>
+ // its memory layout tuple is ([2,4,16,16],[256,512,1,16])
+ //CHECK-LABEL: load_store_matrix_2
+ gpu.func @load_store_matrix_2(%arg0: memref<4096xi8, 3>) -> f16 {
+ %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>
+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
+ //CHECK: %[[tid_x:.*]] = gpu.thread_id x
+ //CHECK: %[[c13:.*]] = arith.constant 13 : index
+ //CHECK: %[[c16:.*]] = arith.constant 16 : index
+ //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c13]], %[[c16]] : index
+ //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c13]], %[[c16]] : index
+ //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
+ //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
+
+ //CHECK: %[[c256:.*]] = arith.constant 256 : index
+ //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c256]] : index
+ //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
+ //CHECK: %[[c512:.*]] = arith.constant 512 : index
+ //CHECK: %[[mul1:.*]] = arith.muli %[[offsety_0]], %[[c512]] : index
+ //CHECK: %[[add1:.*]] = arith.addi %[[mul1]], %[[add0]] : index
+ //CHECK: %[[c1:.*]] = arith.constant 1 : index
+ //CHECK: %[[mul2:.*]] = arith.muli %[[offsetx_1]], %[[c1]] : index
+ //CHECK: %[[add2:.*]] = arith.addi %[[mul2]], %[[add1]] : index
+ //CHECK: %[[mul3:.*]] = arith.muli %[[offsety_1]], %[[c16]] : index
+ //CHECK: %[[add3:.*]] = arith.addi %[[mul3]], %[[add2]] : index
+
+ //CHECK: %[[loaded:.*]] = llvm.load {{.*}}: !llvm.ptr<3> -> f16
+
+
+ %tid_x = gpu.thread_id x
+ %c13 = arith.constant 13 : index
+ %1 = xegpu.load_matrix %0[%c13, %tid_x]: !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>, index, index -> f16
+
+ //CHECK: llvm.store %[[loaded]], {{.*}} : f16, !llvm.ptr<3>
+
+ xegpu.store_matrix %1, %0[%c13, %tid_x]: f16, !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>, index, index
+ gpu.return %1: f16
+ }
+
+
+ // e.g. for mem_desc<32x64xf16, @block=[16, 16]>
+ // its memory layout tuple is ([2,4,16,16],[1024,256,16,1])
+ //CHECK-LABEL: load_store_matrix_3
+ gpu.func @load_store_matrix_3(%arg0: memref<4096xi8, 3>) -> f16 {
+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
+ //CHECK: %[[view:.*]] = memref.view %arg0[%[[c0]]][] : memref<4096xi8, 3> to memref<2048xf16, 3>
+ %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>
+
+ //CHECK: %[[tid_x:.*]] = gpu.thread_id x
+ //CHECK: %[[c19:.*]] = arith.constant 19 : index
+ %tid_x = gpu.thread_id x
+ %c19 = arith.constant 19: index
+
+ //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %[[view]] : memref<2048xf16, 3> -> index
+ //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
+ //CHECK: %[[c16:.*]] = arith.constant 16 : index
+ //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c19]], %[[c16]] : index
+ //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c19]], %[[c16]] : index
+ //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
+ //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
+ //CHECK: %[[c1024:.*]] = arith.constant 1024 : index
+ //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c1024]] : index
+ //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
+ //CHECK: %[[c256:.*]] = arith.constant 256 : index
+ //CHECK: %[[mul1:.*]] = arith.muli %[[offsety_0]], %[[c256]] : index
+ //CHECK: %[[add1:.*]] = arith.addi %[[mul1]], %[[add0]] : index
+ //CHECK: %[[mul2:.*]] = arith.muli %[[offsetx_1]], %[[c16]] : index
+ //CHECK: %[[add2:.*]] = arith.addi %[[mul2]], %[[add1]] : index
+ //CHECK: %[[c1:.*]] = arith.constant 1 : index
+ //CHECK: %[[mul3:.*]] = arith.muli %[[offsety_1]], %[[c1]] : index
+ //CHECK: %[[add3:.*]] = arith.addi %[[mul3]], %[[add2]] : index
+
+ //CHECK: %[[loaded:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> f16
+ %1 = xegpu.load_matrix %0[%c19, %tid_x]: !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>, index, index -> f16
+
+ //CHECK: llvm.store %[[loaded]], {{.*}} : f16, !llvm.ptr<3>
+ xegpu.store_matrix %1, %0[%c19, %tid_x]: f16, !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>, index, index
+
+ //CHECK: gpu.return %[[loaded]] : f16
+ gpu.return %1: f16
+ }
+
+ // e.g. for mem_desc<32x64xf16, @block=[16, 16], @strides=[1, 16]>
+ // its memory layout tuple is ([2,4,16,16],[256,512,1,16])
+ //CHECK-LABEL: load_store_matrix_4
+ gpu.func @load_store_matrix_4(%arg0: memref<4096xi8, 3>) -> vector<8xf16> {
+ %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>
+
+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
+ //CHECK: %[[tid_x:.*]] = gpu.thread_id x
+
+ //CHECK: %[[c16:.*]] = arith.constant 16 : index
+ //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c16]], %[[c16]] : index
+ //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c16]], %[[c16]] : index
+ //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
+ //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
+
+ //CHECK: %[[c256:.*]] = arith.constant 256 : index
+ //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c256]] : index
+ //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
+ //CHECK: %[[c512:.*]] = arith.constant 512 : index
+ //CHECK: %[[mul1:.*]] = arith.muli %[[offsety_0]], %[[c512]] : index
+ //CHECK: %[[add1:.*]] = arith.addi %[[mul1]], %[[add0]] : index
+ //CHECK: %[[c1:.*]] = arith.constant 1 : index
+ //CHECK: %[[mul2:.*]] = arith.muli %[[offsetx_1]], %[[c1]] : index
+ //CHECK: %[[add2:.*]] = arith.addi %[[mul2]], %[[add1]] : index
+ //CHECK: %[[mul3:.*]] = arith.muli %[[offsety_1]], %[[c16]] : index
+ //CHECK: %[[add3:.*]] = arith.addi %[[mul3]], %[[add2]] : index
+
+ //CHECK: %[[loaded:.*]] = llvm.load {{.*}}: !llvm.ptr<3> -> vector<8xf16>
+
+ %tid_x = gpu.thread_id x
+ %c16 = arith.constant 16 : index
+ %1 = xegpu.load_matrix %0[%c16, %tid_x] : !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>, index, index -> vector<8xf16>
+
+ //CHECK: llvm.store %[[loaded]], {{.*}} : vector<8xf16>, !llvm.ptr<3>
+ xegpu.store_matrix %1, %0[%c16, %tid_x] : vector<8xf16>, !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<stride = [1, 32], block = [16, 16]>>, index, index
+
+ gpu.return %1: vector<8xf16>
+ }
+
+
+ // e.g. for mem_desc<32x64xf16, @block=[16, 16]>
+ // its memory layout tuple is ([2,4,16,16],[1024,256,16,1])
+ //CHECK-LABEL: load_store_matrix_5
+ gpu.func @load_store_matrix_5(%arg0: memref<4096xi8, 3>) -> vector<8xf16> {
+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
+ //CHECK: %[[view:.*]] = memref.view %arg0[%[[c0]]][] : memref<4096xi8, 3> to memref<2048xf16, 3>
+
+ %0 = xegpu.create_mem_desc %arg0 : memref<4096xi8, 3> -> !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>
+
+ //CHECK: %[[c16:.*]] = arith.constant 16 : index
+ //CHECK: %[[c48:.*]] = arith.constant 48 : index
+
+ %c16 = arith.constant 16 : index
+ %c48 = arith.constant 48 : index
+
+ //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %[[view]] : memref<2048xf16, 3> -> index
+ //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
+ //CHECK: %[[offset0:.*]] = arith.divsi %[[c16]], %[[c16]] : index
+ //CHECK: %[[offset1:.*]] = arith.remsi %[[c16]], %[[c16]] : index
+ //CHECK: %[[offset2:.*]] = arith.divsi %[[c48]], %[[c16]] : index
+ //CHECK: %[[offset3:.*]] = arith.remsi %[[c48]], %[[c16]] : index
+ //CHECK: %[[c1024:.*]] = arith.constant 1024 : index
+ //CHECK: %[[mul0:.*]] = arith.muli %[[offset0]], %[[c1024]] : index
+ //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
+ //CHECK: %[[c256:.*]] = arith.constant 256 : index
+ //CHECK: %[[mul1:.*]] = arith.muli %[[offset2]], %[[c256]] : index
+ //CHECK: %[[add1:.*]] = arith.addi %[[mul1]], %[[add0]] : index
+ //CHECK: %[[mul2:.*]] = arith.muli %[[offset1]], %[[c16]] : index
+ //CHECK: %[[add2:.*]] = arith.addi %[[mul2]], %[[add1]] : index
+ //CHECK: %[[c1:.*]] = arith.constant 1 : index
+ //CHECK: %[[mul3:.*]] = arith.muli %[[offset3]], %[[c1]] : index
+ //CHECK: %[[linearOffset:.*]] = arith.addi %[[mul3]], %[[add2]] : index
+ //CHECK: %[[linearOffsetI64:.*]] = arith.index_castui %[[linearOffset]] : index to i32
+ //CHECK: %[[c2:.*]] = arith.constant 2 : i32
+ //CHECK: %[[byteOffset:.*]] = arith.muli %[[linearOffsetI64]], %[[c2]] : i32
+ //CHECK: %[[finalPtr:.*]] = arith.addi %[[basePtrI64]], %[[byteOffset]] : i32
+ //CHECK: %[[ptr:.*]] = llvm.inttoptr %[[finalPtr]] : i32 to !llvm.ptr<3>
+ //CHECK: %[[loadedI16:.*]] = xevm.blockload %[[ptr]] : (!llvm.ptr<3>) -> vector<8xi16>
+ //CHECK: %[[loaded:.*]] = vector.bitcast %[[loadedI16]] : vector<8xi16> to vector<8xf16>
+
+ %1 = xegpu.load_matrix %0[%c16, %c48] {subgroup_block_io}: !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>, index, index -> vector<8xf16>
+
+ //CHECK: %[[storeDataI16:.*]] = vector.bitcast %[[loaded]] : vector<8xf16> to vector<8xi16>
+ //CHECK: xevm.blockstore %[[ptr]], %[[storeDataI16]] : (!llvm.ptr<3>, vector<8xi16>)
+
+ xegpu.store_matrix %1, %0[%c16, %c48] {subgroup_block_io}: vector<8xf16>, !xegpu.mem_desc<32x64xf16, #xegpu.mem_layout<block = [16, 16]>>, index, index
+
+ gpu.return %1: vector<8xf16>
+ }
+
+}
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
index 0b150e9..9c552d8 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
@@ -14,19 +14,36 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
// CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
// CHECK: %[[VAR6:.*]] = scf.if %[[VAR2]] -> (f16) {
- // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> vector<1xf16>
- // CHECK: %[[VAR8:.*]] = vector.extract %[[VAR7]][0] : f16 from vector<1xf16>
- // CHECK: scf.yield %[[VAR8]] : f16
- // CHECK: } else {
- // CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf16>
- // CHECK: %[[VAR7:.*]] = vector.extract %[[CST_0]][0] : f16 from vector<1xf16>
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16
// CHECK: scf.yield %[[VAR7]] : f16
+ // CHECK: } else {
+ // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
+ // CHECK: scf.yield %[[CST_0]] : f16
// CHECK: }
%3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
gpu.return
}
}
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: @source_materialize_single_elem_vec
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
+gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) {
+ %1 = arith.constant dense<1>: vector<1xi1>
+ %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+ // CHECK: %[[VAR_IF:.*]] = scf.if
+ // CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16>
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16>
+ %c0 = arith.constant 0 : index
+ vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16>
+ gpu.return
+}
+}
+
// -----
gpu.module @test {
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 66e7dd4..a8256b1 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -238,3 +238,27 @@ func.func @gather_to_lds_non_lds(%idx1 : index, %mem1 : memref<32xf16>, %mem2 :
amdgpu.gather_to_lds %mem1[%idx1], %mem2[%idx1] : vector<2xf16>, memref<32xf16>, memref<32xf16, strided<[?]>, #gpu.address_space<workgroup>>
func.return
}
+
+// -----
+
+func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_16(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) {
+ // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 16 can only have firstScaleByte be 0 or 1.}}
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(2) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16>
+ func.return
+}
+
+// -----
+
+func.func @amdgpu.scaled_ext_packed816_invalid_block_size_and_first_scale_byte_32(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) {
+ // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op blockSize of 32 can only have firstScaleByte be 0 or 2.}}
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(1) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16>
+ func.return
+}
+
+// -----
+
+func.func @amdgpu.scaled_ext_packed816_invalid_input_output_sizes(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) {
+ // expected-error@+1 {{'amdgpu.scaled_ext_packed816' op failed to verify that all of {source, res} have same shape}}
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(16) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<16xf16>
+ func.return
+}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 8f427e9..f9c6899 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -221,6 +221,61 @@ func.func @scaled_ext_scalar_f4e2m1_bf16(%v: vector<2xf4E2M1FN>, %scale: f32) ->
func.return %ret : vector<2xbf16>
}
+// CHECK-LABEL: func.func @scaled_ext_packed816_fp4
+func.func @scaled_ext_packed816_fp4(%v: vector<8xf4E2M1FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xbf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf4E2M1FN>, vector<4xf8E8M0FNU> -> vector<8xf32>
+ func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed816_fp8
+func.func @scaled_ext_packed816_fp8(%v: vector<8xf8E4M3FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xbf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E4M3FN>, vector<4xf8E8M0FNU> -> vector<8xf32>
+ func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed816_bf8
+func.func @scaled_ext_packed816_bf8(%v: vector<8xf8E5M2>, %scale: vector<4xf8E8M0FNU>) -> (vector<8xf16>, vector<8xbf16>, vector<8xf32>) {
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xbf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<8xf8E5M2>, vector<4xf8E8M0FNU> -> vector<8xf32>
+ func.return %ret0, %ret1, %ret2 : vector<8xf16>, vector<8xbf16>, vector<8xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed816_fp6
+func.func @scaled_ext_packed816_fp6(%v: vector<16xf6E2M3FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xbf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E2M3FN>, vector<4xf8E8M0FNU> -> vector<16xf32>
+ func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
+// CHECK-LABEL: func.func @scaled_ext_packed816_bf16
+func.func @scaled_ext_packed816_bf16(%v: vector<16xf6E3M2FN>, %scale: vector<4xf8E8M0FNU>) -> (vector<16xf16>, vector<16xbf16>, vector<16xf32>) {
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret0 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret1 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xbf16>
+ // CHECK: amdgpu.scaled_ext_packed816
+ %ret2 = amdgpu.scaled_ext_packed816 %v scale(%scale) blockSize(32) firstScaleLane(0) firstScaleByte(0) : vector<16xf6E3M2FN>, vector<4xf8E8M0FNU> -> vector<16xf32>
+ func.return %ret0, %ret1, %ret2 : vector<16xf16>, vector<16xbf16>, vector<16xf32>
+}
+
// CHECK-LABEL: func.func @packed_scaled_trunc_f8e4m3_f32
// CHECK: amdgpu.packed_scaled_trunc
func.func @packed_scaled_trunc_f8e4m3_f32(%v: vector<2xf32>, %scale: f32) -> vector<4xf8E4M3FN> {
diff --git a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir
index 7e562b00..a109f42 100644
--- a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir
+++ b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir
@@ -60,30 +60,74 @@ func.func @mulfp16(%arg0: memref<?x?xf16>, %arg1: memref<?x?xf32>) {
return
}
-// CHECK-LABEL: strides(
-// CHECK: %[[CST_64_1:.+]] = llvm.mlir.constant(64 : i64) : i64
-// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_1]]
-// CHECK: %[[CST_128_1:.+]] = llvm.mlir.constant(128 : i64) : i64
-// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_1]]
-// CHECK: llvm.mlir.constant(2 : i64) : i64
+/// Intrinsics require stride in number of bytes.
+// CHECK-LABEL: strides_implicit(
+// CHECK: %[[LOAD_STRIDE_1:.+]] = llvm.mlir.constant(32 : i64) : i64
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_1]]
+// CHECK: %[[LOAD_STRIDE_2:.+]] = llvm.mlir.constant(128 : i64) : i64
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_2]]
// CHECK: llvm.extractvalue %{{.+}}[4, 0]
-// CHECK: %[[STRIDE_1:.+]] = llvm.mul
-// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_1]]
-// CHECK: %[[CST_64_2:.+]] = llvm.mlir.constant(64 : i64) : i64
-// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_2]]
-// CHECK: %[[CST_128_2:.+]] = llvm.mlir.constant(128 : i64) : i64
-// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_2]]
-// CHECK: llvm.mlir.constant(2 : i64) : i64
+// CHECK: %[[LOAD_BUF_STRIDE:.+]] = llvm.extractvalue %{{.+}}[4, 0]
+// CHECK: %[[LOAD_STRIDE_SCALE:.+]] = llvm.mlir.constant(4 : i64) : i64
+// CHECK: %[[LOAD_STRIDE_3:.+]] = llvm.mul %[[LOAD_STRIDE_SCALE]], %[[LOAD_BUF_STRIDE]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_3]]
+// CHECK: %[[STORE_STRIDE_1:.+]] = llvm.mlir.constant(32 : i64) : i64
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_1]]
+// CHECK: %[[STORE_STRIDE_2:.+]] = llvm.mlir.constant(128 : i64) : i64
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_2]]
// CHECK: llvm.extractvalue %{{.+}}[4, 0]
-// CHECK: %[[STRIDE_2:.+]] = llvm.mul
-// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_2]]
-func.func @strides(%arg0: memref<16x32xbf16>, %arg1: memref<16x32xbf16, strided<[64, 1]>>, %arg2: memref<16x32xbf16, strided<[?, 1]>>) {
+// CHECK: %[[STORE_BUF_STRIDE:.+]] = llvm.extractvalue %{{.+}}[4, 0]
+// CHECK: %[[STORE_STRIDE_SCALE:.+]] = llvm.mlir.constant(4 : i64) : i64
+// CHECK: %[[STORE_STRIDE_3:.+]] = llvm.mul %[[STORE_STRIDE_SCALE]], %[[STORE_BUF_STRIDE]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_3]]
+func.func @strides_implicit(%arg0: memref<16x32xi8>,
+ %arg1: memref<32x32xbf16, strided<[64, 1]>>,
+ %arg2: memref<16x32xf32, strided<[?, 1]>>) {
%0 = arith.constant 0 : index
- %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into !amx.tile<16x32xbf16>
- %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16, strided<[64, 1]>> into !amx.tile<16x32xbf16>
- %3 = amx.tile_load %arg2[%0, %0] : memref<16x32xbf16, strided<[?, 1]>> into !amx.tile<16x32xbf16>
- amx.tile_store %arg0[%0, %0], %3 : memref<16x32xbf16>, !amx.tile<16x32xbf16>
- amx.tile_store %arg1[%0, %0], %1 : memref<16x32xbf16, strided<[64, 1]>>, !amx.tile<16x32xbf16>
- amx.tile_store %arg2[%0, %0], %2 : memref<16x32xbf16, strided<[?, 1]>>, !amx.tile<16x32xbf16>
+ %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xi8> into !amx.tile<16x32xi8>
+ %2 = amx.tile_load %arg1[%0, %0] : memref<32x32xbf16, strided<[64, 1]>> into !amx.tile<16x32xbf16>
+ %3 = amx.tile_load %arg2[%0, %0] : memref<16x32xf32, strided<[?, 1]>> into !amx.tile<16x16xf32>
+ amx.tile_store %arg0[%0, %0], %1 : memref<16x32xi8>, !amx.tile<16x32xi8>
+ amx.tile_store %arg1[%0, %0], %2 : memref<32x32xbf16, strided<[64, 1]>>, !amx.tile<16x32xbf16>
+ amx.tile_store %arg2[%0, %0], %3 : memref<16x32xf32, strided<[?, 1]>>, !amx.tile<16x16xf32>
+ return
+}
+
+/// Intrinsics require stride in number of bytes.
+// CHECK-LABEL: strides_explicit(
+// CHECK-SAME: %[[STRIDE:.+]]: index
+// CHECK-DAG: %[[STRIDE_I64:.+]] = builtin.unrealized_conversion_cast %[[STRIDE]] : index to i64
+// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
+// CHECK-DAG: %[[C64_I64:.+]] = builtin.unrealized_conversion_cast %[[C64]] : index to i64
+// CHECK: %[[LOAD_STRIDE_SCALE_1:.+]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[LOAD_STRIDE_1:.+]] = llvm.mul %[[LOAD_STRIDE_SCALE_1]], %[[STRIDE_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_1]]
+// CHECK: %[[LOAD_STRIDE_SCALE_2:.+]] = llvm.mlir.constant(2 : i64) : i64
+// CHECK: %[[LOAD_STRIDE_2:.+]] = llvm.mul %[[LOAD_STRIDE_SCALE_2]], %[[STRIDE_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_2]]
+// CHECK: %[[LOAD_STRIDE_SCALE_3:.+]] = llvm.mlir.constant(4 : i64) : i64
+// CHECK: %[[LOAD_STRIDE_3:.+]] = llvm.mul %[[LOAD_STRIDE_SCALE_3]], %[[C64_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tileloadd64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[LOAD_STRIDE_3]]
+// CHECK: %[[STORE_STRIDE_SCALE_1:.+]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK: %[[STORE_STRIDE_1:.+]] = llvm.mul %[[STORE_STRIDE_SCALE_1]], %[[STRIDE_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_1]]
+// CHECK: %[[STORE_STRIDE_SCALE_2:.+]] = llvm.mlir.constant(2 : i64) : i64
+// CHECK: %[[STORE_STRIDE_2:.+]] = llvm.mul %[[STORE_STRIDE_SCALE_2]], %[[STRIDE_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_2]]
+// CHECK: %[[STORE_STRIDE_SCALE_3:.+]] = llvm.mlir.constant(4 : i64) : i64
+// CHECK: %[[STORE_STRIDE_3:.+]] = llvm.mul %[[STORE_STRIDE_SCALE_3]], %[[C64_I64]]
+// CHECK: llvm.call_intrinsic "llvm.x86.tilestored64.internal"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STORE_STRIDE_3]]
+func.func @strides_explicit(%stride: index,
+ %arg0: memref<?xi8>,
+ %arg1: memref<16x32xbf16>,
+ %arg2: memref<32x32xf32, strided<[64, 1]>>) {
+ %0 = arith.constant 0 : index
+ %c64 = arith.constant 64 : index
+ %1 = amx.tile_load %arg0[%0], %stride : memref<?xi8> into !amx.tile<16x32xi8>
+ %2 = amx.tile_load %arg1[%0, %0], %stride : memref<16x32xbf16> into !amx.tile<16x32xbf16>
+ %3 = amx.tile_load %arg2[%0, %0], %c64 : memref<32x32xf32, strided<[64, 1]>> into !amx.tile<16x16xf32>
+ amx.tile_store %arg0[%0], %1, %stride : memref<?xi8>, !amx.tile<16x32xi8>
+ amx.tile_store %arg1[%0, %0], %2, %stride : memref<16x32xbf16>, !amx.tile<16x32xbf16>
+ amx.tile_store %arg2[%0, %0], %3, %c64 : memref<32x32xf32, strided<[64, 1]>>, !amx.tile<16x16xf32>
return
}
diff --git a/mlir/test/Dialect/AMX/roundtrip.mlir b/mlir/test/Dialect/AMX/roundtrip.mlir
index 1b7f781..3d0f276 100644
--- a/mlir/test/Dialect/AMX/roundtrip.mlir
+++ b/mlir/test/Dialect/AMX/roundtrip.mlir
@@ -1,5 +1,33 @@
// RUN: mlir-opt -verify-diagnostics %s | mlir-opt | FileCheck %s
+// CHECK-LABEL: tloadstore
+// CHECK: %[[x:.*]] = amx.tile_load %{{.*}}[%{{.*}}], %{{.*}} :
+// CHECK-SAME: memref<?xbf16> into !amx.tile<16x32xbf16>
+// CHECK: %[[y:.*]] = amx.tile_load %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} :
+// CHECK-SAME: memref<?x?xbf16> into !amx.tile<16x32xbf16>
+// CHECK: %[[z:.*]] = amx.tile_load %{{.*}}[%{{.*}}, %{{.*}}] :
+// CHECK-SAME: memref<?x?xbf16, strided<[64, 1]>> into !amx.tile<16x32xbf16>
+// CHECK: amx.tile_store %{{.*}}[%{{.*}}], %[[z]], %{{.*}} :
+// CHECK-SAME: memref<?xbf16>, !amx.tile<16x32xbf16>
+// CHECK: amx.tile_store %{{.*}}[%{{.*}}, %{{.*}}], %[[x]], %{{.*}} :
+// CHECK-SAME: memref<?x?xbf16>, !amx.tile<16x32xbf16>
+// CHECK: amx.tile_store %{{.*}}[%{{.*}}, %{{.*}}], %[[y]] :
+// CHECK-SAME: memref<?x?xbf16, strided<[64, 1]>>, !amx.tile<16x32xbf16>
+func.func @tloadstore(%stride: index,
+ %arg0: memref<?xbf16>,
+ %arg1: memref<?x?xbf16>,
+ %arg2: memref<?x?xbf16, strided<[64, 1]>>) {
+ %0 = arith.constant 0 : index
+ %c64 = arith.constant 64 : index
+ %1 = amx.tile_load %arg0[%0], %stride : memref<?xbf16> into !amx.tile<16x32xbf16>
+ %2 = amx.tile_load %arg1[%0, %0], %stride : memref<?x?xbf16> into !amx.tile<16x32xbf16>
+ %3 = amx.tile_load %arg2[%0, %0] : memref<?x?xbf16, strided<[64, 1]>> into !amx.tile<16x32xbf16>
+ amx.tile_store %arg0[%0], %3, %stride : memref<?xbf16>, !amx.tile<16x32xbf16>
+ amx.tile_store %arg1[%0, %0], %1, %stride : memref<?x?xbf16>, !amx.tile<16x32xbf16>
+ amx.tile_store %arg2[%0, %0], %2 : memref<?x?xbf16, strided<[64, 1]>>, !amx.tile<16x32xbf16>
+ return
+}
+
// CHECK-LABEL: tzero
// CHECK: amx.tile_zero : !amx.tile<16x16xbf16>
// CHECK: amx.tile_store %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} : memref<?x?xbf16>, !amx.tile<16x16xbf16>
diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir
index e56079c..1169cd1 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -2235,6 +2235,136 @@ func.func @affine_leading_zero_no_outer_bound(%arg0: index, %arg1: index) -> ind
// -----
+// CHECK-LABEL: func @delin_apply_cancel_exact
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>)
+// CHECK-COUNT-6: memref.store %[[ARG0]], %[[ARG1]][%[[ARG0]]]
+// CHECK-NOT: memref.store
+// CHECK: return
+func.func @delin_apply_cancel_exact(%arg0: index, %arg1: memref<?xindex>) {
+ %a:3 = affine.delinearize_index %arg0 into (4, 5) : index, index, index
+ %b:3 = affine.delinearize_index %arg0 into (3, 4, 5) : index, index, index
+ %c:2 = affine.delinearize_index %arg0 into (20) : index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1, s2] -> (s0 + s1 * 5 + s2 * 20)>()[%a#2, %a#1, %a#0]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ %t2 = affine.apply affine_map<()[s0, s1, s2] -> (s0 + s2 * 20 + s1 * 5)>()[%a#2, %a#1, %a#0]
+ memref.store %t2, %arg1[%t2] : memref<?xindex>
+
+ %t3 = affine.apply affine_map<()[s0, s1, s2] -> (s1 * 20 + s2 * 5 + s0)>()[%a#2, %a#0, %a#1]
+ memref.store %t3, %arg1[%t3] : memref<?xindex>
+
+ %t4 = affine.apply affine_map<()[s0, s1, s2] -> (s0 + s1 * 5 + s2 * 20)>()[%b#2, %b#1, %b#0]
+ memref.store %t4, %arg1[%t4] : memref<?xindex>
+
+ %t5 = affine.apply affine_map<()[s0, s1] -> (s0 + s1 * 20)>()[%c#1, %c#0]
+ memref.store %t5, %arg1[%t5] : memref<?xindex>
+
+ %t6 = affine.apply affine_map<()[s0, s1] -> (s1 * 20 + s0)>()[%c#1, %c#0]
+ memref.store %t6, %arg1[%t5] : memref<?xindex>
+
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @delin_apply_cancel_exact_dim
+// CHECK: affine.for %[[arg1:.+]] = 0 to 256
+// CHECK: memref.store %[[arg1]]
+// CHECK: return
+func.func @delin_apply_cancel_exact_dim(%arg0: memref<?xindex>) {
+ affine.for %arg1 = 0 to 256 {
+ %a:3 = affine.delinearize_index %arg1 into (2, 2, 64) : index, index, index
+ %i = affine.apply affine_map<(d0, d1, d2) -> (d0 + d1 * 128 + d2 * 64)>(%a#2, %a#0, %a#1)
+ memref.store %i, %arg0[%i] : memref<?xindex>
+ }
+ return
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 512)>
+// CHECK-LABEL: func @delin_apply_cancel_const_term
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>)
+// CHECK: affine.apply #[[$MAP]]()[%[[ARG0]]]
+// CHECK: return
+func.func @delin_apply_cancel_const_term(%arg0: index, %arg1: memref<?xindex>) {
+ %a:3 = affine.delinearize_index %arg0 into (2, 2, 64) : index, index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1, s2] -> (s0 + s1 * 128 + s2 * 64 + 512)>()[%a#2, %a#0, %a#1]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ return
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 + 512)>
+// CHECK-LABEL: func @delin_apply_cancel_var_term
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>, %[[ARG2:.+]]: index)
+// CHECK: affine.apply #[[$MAP]]()[%[[ARG2]], %[[ARG0]]]
+// CHECK: return
+func.func @delin_apply_cancel_var_term(%arg0: index, %arg1: memref<?xindex>, %arg2: index) {
+ %a:3 = affine.delinearize_index %arg0 into (2, 2, 64) : index, index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1, s2, s3] -> (s0 + s1 * 128 + s2 * 64 + s3 + 512)>()[%a#2, %a#0, %a#1, %arg2]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ return
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 * 2 + s0 ceildiv 4)>
+// CHECK-LABEL: func @delin_apply_cancel_nested_exprs
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>)
+// CHECK: affine.apply #[[$MAP]]()[%[[ARG0]]]
+// CHECK: return
+func.func @delin_apply_cancel_nested_exprs(%arg0: index, %arg1: memref<?xindex>) {
+ %a:2 = affine.delinearize_index %arg0 into (20) : index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1] -> ((s0 + s1 * 20) ceildiv 4 + (s1 * 20 + s0) * 2)>()[%a#1, %a#0]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ return
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)>
+// CHECK-LABEL: func @delin_apply_cancel_preserve_rotation
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>)
+// CHECK: %[[A:.+]]:2 = affine.delinearize_index %[[ARG0]] into (20)
+// CHECK: affine.apply #[[$MAP]]()[%[[A]]#1, %[[ARG0]]]
+// CHECK: return
+func.func @delin_apply_cancel_preserve_rotation(%arg0: index, %arg1: memref<?xindex>) {
+ %a:2 = affine.delinearize_index %arg0 into (20) : index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1] -> (s0 + s1 * 20 + s0)>()[%a#1, %a#0]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ return
+}
+
+// -----
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 5)>
+// CHECK-LABEL: func @delin_apply_dont_cancel_partial
+// CHECK-SAME: (%[[ARG0:.+]]: index, %[[ARG1:.+]]: memref<?xindex>)
+// CHECK: %[[A:.+]]:3 = affine.delinearize_index %[[ARG0]] into (3, 4, 5)
+// CHECK: affine.apply #[[$MAP]]()[%[[A]]#2, %[[A]]#1]
+// CHECK: return
+func.func @delin_apply_dont_cancel_partial(%arg0: index, %arg1: memref<?xindex>) {
+ %a:3 = affine.delinearize_index %arg0 into (3, 4, 5) : index, index, index
+
+ %t1 = affine.apply affine_map<()[s0, s1] -> (s0 + s1 * 5)>()[%a#2, %a#1]
+ memref.store %t1, %arg1[%t1] : memref<?xindex>
+
+ return
+}
+
+// -----
+
// CHECK-LABEL: @cst_value_to_cst_attr_basis_delinearize_index
// CHECK-SAME: (%[[ARG0:.*]]: index)
// CHECK: %[[RET:.*]]:3 = affine.delinearize_index %[[ARG0]] into (3, 4, 2) : index, index
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
index c58b153..21b508e 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
// -----
-func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
func.return %A : tensor<?xf32>
}
-// CHECK-LABEL: func @return_arg
+// CHECK-LABEL: func private @return_arg
// CHECK-SAME: %[[A:.*]]: memref<?xf32
// CHECK-NOT: return %[[A]]
-// NO-DROP-LABEL: func @return_arg
+// NO-DROP-LABEL: func private @return_arg
// NO-DROP-SAME: %[[A:.*]]: memref<?xf32
// NO-DROP: return %[[A]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index 6054a61..d5f834b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
// Bufferization of a function that is reading and writing. %t0 is writable, so
// no copy should be inserted.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
return %0, %1 : tensor<?xf32>, f32
}
-// CHECK-LABEL: func @call_func_with_non_tensor_return(
+// CHECK-LABEL: func private @call_func_with_non_tensor_return(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @call_func_with_non_tensor_return(
+func.func private @call_func_with_non_tensor_return(
%t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
// CHECK-NOT: alloc
// CHECK-NOT: copy
@@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return(
// Bufferization of a function that is reading and writing. %t0 is not writable,
// so a copy is needed.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
// This function does not read, just write. We need an alloc, but no copy.
-// CHECK-LABEL: func @does_not_read(
+// CHECK-LABEL: func private @does_not_read(
// CHECK-NOT: alloc
// CHECK-NOT: copy
-func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
%f0 = arith.constant 0.0 : f32
%r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
return %r : tensor<?xf32>
@@ -354,9 +354,9 @@ func.func @main() {
// A write inside an scf.execute_region. An equivalent tensor is yielded.
-// CHECK-LABEL: func @execute_region_test(
+// CHECK-LABEL: func private @execute_region_test(
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
-func.func @execute_region_test(%t1 : tensor<?xf32>)
+func.func private @execute_region_test(%t1 : tensor<?xf32>)
-> (f32, tensor<?xf32>, f32)
{
%f1 = arith.constant 0.0 : f32
@@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() {
// CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>)
func.func private @some_external_func(tensor<?xf32>)
-// CHECK: func @scf_for_with_tensor_insert_slice(
+// CHECK: func private @scf_for_with_tensor_insert_slice(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor_insert_slice(
+func.func private @scf_for_with_tensor_insert_slice(
%A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
@@ -456,11 +456,11 @@ func.func @bar(
// -----
-// CHECK: func @init_and_dot(
+// CHECK: func private @init_and_dot(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>>
-func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
+func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32
%v0 = arith.constant 0.0 : f32
@@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i
// No alloc or copy inside of the loop.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
// CHECK: memref.store %{{.*}}, %[[arg0]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
index e2ab876..b52612d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
@@ -24,10 +24,46 @@
// CHECK-NOT: copy
// CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]])
%0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
- // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}>
+ // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32{{.*}}>
return %1, %0 : f32, tensor<?xf32>
}
"test.finish" () : () -> ()
}) : () -> ()
+// -----
+#enc1 = #test.tensor_encoding<"hello">
+#enc2 = #test.tensor_encoding<"not hello">
+
+"test.symbol_scope_isolated"() ({
+ // CHECK: func @inner_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"hello">>
+ func.func @inner_func(%t: tensor<?xf32, #enc1>)
+ -> tensor<?xf32, #enc1> {
+ // CHECK: return %[[arg0]]
+ return %t : tensor<?xf32, #enc1>
+ }
+
+ // CHECK: func @outer_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> (memref<?xf32, #test.memref_layout<"hello">>,
+ // CHECK-SAME: memref<?xf32, #test.memref_layout<"not hello">>)
+ func.func @outer_func(%t0: tensor<?xf32, #enc1>)
+ -> (tensor<?xf32, #enc1>, tensor<?xf32, #enc2>) {
+ // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
+ %0 = call @inner_func(%t0)
+ : (tensor<?xf32, #enc1>) -> (tensor<?xf32, #enc1>)
+
+ // CHECK: %[[local:.*]] = "test.create_memref_op"() : ()
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"not hello">>
+ %local = "test.create_tensor_op"() : () -> tensor<?xf32, #enc2>
+ // CHECK: %[[dummy:.*]] = "test.dummy_memref_op"(%[[local]])
+ %1 = "test.dummy_tensor_op"(%local) : (tensor<?xf32, #enc2>)
+ -> tensor<?xf32, #enc2>
+
+ // CHECK: return %[[call]], %[[dummy]]
+ return %0, %1 : tensor<?xf32, #enc1>, tensor<?xf32, #enc2>
+ }
+ "test.finish" () : () -> ()
+}) : () -> ()
diff --git a/mlir/test/Dialect/LLVMIR/bytecode.mlir b/mlir/test/Dialect/LLVMIR/bytecode.mlir
new file mode 100644
index 0000000..821b0ac
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/bytecode.mlir
@@ -0,0 +1,35 @@
+// RUN: mlir-opt -verify-roundtrip %s
+
+#access_group = #llvm.access_group<id = distinct[0]<>>
+#access_group1 = #llvm.access_group<id = distinct[1]<>>
+#di_subprogram = #llvm.di_subprogram<recId = distinct[2]<>>
+#loc1 = loc("test.f90":12:14)
+#loc2 = loc("test":4:3)
+#loc6 = loc(fused<#di_subprogram>[#loc1])
+#loc7 = loc(fused<#di_subprogram>[#loc2])
+#loop_annotation = #llvm.loop_annotation<disableNonforced = false, mustProgress = true, startLoc = #loc6, endLoc = #loc7, parallelAccesses = #access_group, #access_group1>
+module {
+ llvm.func @imp_fn() {
+ llvm.return loc(#loc2)
+ } loc(#loc8)
+ llvm.func @loop_annotation_with_locs() {
+ llvm.br ^bb1 {loop_annotation = #loop_annotation} loc(#loc4)
+ ^bb1: // pred: ^bb0
+ llvm.return loc(#loc5)
+ } loc(#loc3)
+} loc(#loc)
+#di_file = #llvm.di_file<"test.f90" in "">
+#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program>
+#loc = loc("test":0:0)
+#loc3 = loc("test-path":36:3)
+#loc4 = loc("test-path":37:5)
+#loc5 = loc("test-path":39:5)
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[3]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit1 = #llvm.di_compile_unit<id = distinct[4]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit2 = #llvm.di_compile_unit<id = distinct[5]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_module = #llvm.di_module<file = #di_file, scope = #di_compile_unit1, name = "mod1">
+#di_module1 = #llvm.di_module<file = #di_file, scope = #di_compile_unit2, name = "mod2">
+#di_imported_entity = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module, file = #di_file, line = 1>
+#di_imported_entity1 = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module1, file = #di_file, line = 1>
+#di_subprogram1 = #llvm.di_subprogram<recId = distinct[2]<>, id = distinct[6]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "imp_fn", file = #di_file, subprogramFlags = Definition, type = #di_subroutine_type, retainedNodes = #di_imported_entity, #di_imported_entity1>
+#loc8 = loc(fused<#di_subprogram1>[#loc1])
diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir
index 8accf6e..755e3a3 100644
--- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir
+++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir
@@ -235,6 +235,17 @@ llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr {
// -----
+// CHECK-LABEL: fold_shufflevector
+// CHECK-SAME: %[[ARG1:[[:alnum:]]+]]: vector<1xf32>, %[[ARG2:[[:alnum:]]+]]: vector<1xf32>
+llvm.func @fold_shufflevector(%v1 : vector<1xf32>, %v2 : vector<1xf32>) -> vector<1xf32> {
+ // CHECK-NOT: llvm.shufflevector
+ %c = llvm.shufflevector %v1, %v2 [0] : vector<1xf32>
+ // CHECK: llvm.return %[[ARG1]]
+ llvm.return %c : vector<1xf32>
+}
+
+// -----
+
// Check that LLVM constants participate in cross-dialect constant folding. The
// resulting constant is created in the arith dialect because the last folded
// operation belongs to it.
diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
index 1834b0a..d7bf99b 100644
--- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir
+++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
@@ -1,4 +1,5 @@
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
// CHECK-DAG: #[[FILE:.*]] = #llvm.di_file<"debuginfo.mlir" in "/test/">
#file = #llvm.di_file<"debuginfo.mlir" in "/test/">
diff --git a/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir
new file mode 100644
index 0000000..7fd97ef
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/inlining-rocdl.mlir
@@ -0,0 +1,14 @@
+// RUN: mlir-opt %s --inline | FileCheck %s
+
+llvm.func @threadidx() -> i32 {
+ %tid = rocdl.workitem.id.x : i32
+ llvm.return %tid : i32
+}
+
+// CHECK-LABEL: func @caller
+llvm.func @caller() -> i32 {
+ // CHECK-NOT: llvm.call @threadidx
+ // CHECK: rocdl.workitem.id.x
+ %z = llvm.call @threadidx() : () -> (i32)
+ llvm.return %z : i32
+}
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 358bd33..242c04f 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -1035,6 +1035,20 @@ llvm.func @rocdl.s.wait.expcnt() {
llvm.return
}
+llvm.func @rocdl.s.wait.asynccnt() {
+ // CHECK-LABEL: rocdl.s.wait.asynccnt
+ // CHECK: rocdl.s.wait.asynccnt 0
+ rocdl.s.wait.asynccnt 0
+ llvm.return
+}
+
+llvm.func @rocdl.s.wait.tensorcnt() {
+ // CHECK-LABEL: rocdl.s.wait.tensorcnt
+ // CHECK: rocdl.s.wait.tensorcnt 0
+ rocdl.s.wait.tensorcnt 0
+ llvm.return
+}
+
// -----
llvm.func @rocdl.readfirstlane(%src : f32) -> f32 {
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
index 7344797..00e763a 100644
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -verify-roundtrip %s
// CHECK-LABEL: func @baz
@@ -757,7 +757,7 @@ llvm.func @stackrestore(%arg0: !llvm.ptr) {
// CHECK-LABEL: @experimental_noalias_scope_decl
llvm.func @experimental_noalias_scope_decl() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope
llvm.return
}
@@ -767,7 +767,7 @@ llvm.func @experimental_noalias_scope_decl() {
// CHECK-LABEL: @experimental_noalias_scope_with_string_id
llvm.func @experimental_noalias_scope_with_string_id() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope2
llvm.return
}
diff --git a/mlir/test/Dialect/Linalg/decompose-pack.mlir b/mlir/test/Dialect/Linalg/decompose-pack.mlir
index 18a09f4..12292ee 100644
--- a/mlir/test/Dialect/Linalg/decompose-pack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-pack.mlir
@@ -31,6 +31,25 @@ func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<?x?xi32>, %arg1: tensor<1x1x?x1xi
// -----
+func.func @NCHW_to_NCHWc(%src: tensor<2x32x16x8xf32>, %dest: tensor<2x1x16x8x32xf32>) -> tensor<2x1x16x8x32xf32> {
+ %pack = linalg.pack %src
+ inner_dims_pos = [1]
+ inner_tiles = [32] into %dest
+ : tensor<2x32x16x8xf32> -> tensor<2x1x16x8x32xf32>
+ return %pack : tensor<2x1x16x8x32xf32>
+}
+// CHECK-LABEL: func.func @NCHW_to_NCHWc(
+// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
+// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<2x16x8x32xf32>
+// CHECK: %[[TR:.*]] = linalg.transpose ins(%[[SRC]] : tensor<2x32x16x8xf32>) outs(%[[INIT]] : tensor<2x16x8x32xf32>) permutation = [0, 2, 3, 1]
+// CHECK: %[[RES:.*]] = tensor.insert_slice %[[TR]] into %[[DEST]]
+// CHECK-SAME: [0, 0, 0, 0, 0] [2, 1, 16, 8, 32] [1, 1, 1, 1, 1]
+// CHECK-SAME: : tensor<2x16x8x32xf32> into tensor<2x1x16x8x32xf32>
+// CHECK: return %[[RES]] : tensor<2x1x16x8x32xf32>
+
+// -----
+
func.func @simple_pad_and_pack_static_tiles(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
%0 = linalg.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
return %0 : tensor<1x1x8x2xf32>
@@ -157,6 +176,8 @@ func.func @simple_pad_and_pack_dynamic_tiles(%input: tensor<5x1xf32>, %output: t
// -----
+// Note - un-tiled outer dims are permueted. However, these are unit dims, which is supported.
+
func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x1x5x1xf32>, %output: tensor<1x1x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<1x1x1x1x2x?xf32> {
%0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x1x5x1xf32> -> tensor<1x1x1x1x2x?xf32>
return %0 : tensor<1x1x1x1x2x?xf32>
@@ -182,6 +203,28 @@ func.func @simple_pad_and_pack_dynamic_tile_not_all_dims_tiled(%input: tensor<1x
// -----
+// Similar as the example above, but one of the un-tiled outer dims that are permuted is non-unit: (7,1) -> (1, 7)
+
+func.func @negative_not_all_dims_tiled_outer_dim_0_permuted(%input: tensor<7x1x5x1xf32>, %output: tensor<1x7x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<1x7x1x1x2x?xf32> {
+ %0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<7x1x5x1xf32> -> tensor<1x7x1x1x2x?xf32>
+ return %0 : tensor<1x7x1x1x2x?xf32>
+}
+// CHECK-LABEL: func.func @negative_not_all_dims_tiled_outer_dim_0_permuted
+// CHECK: linalg.pack
+
+// -----
+
+// Similar as the example above, but one of the un-tiled outer dims that are permuted is non-unit: (1, 7) -> (7, 1).
+
+func.func @negative_not_all_dims_tiled_outer_dim_1_permuted(%input: tensor<1x7x5x1xf32>, %output: tensor<7x1x1x1x2x?xf32>, %pad: f32, %high: index) -> tensor<7x1x1x1x2x?xf32> {
+ %0 = linalg.pack %input padding_value(%pad : f32) outer_dims_perm = [1, 0, 2, 3] inner_dims_pos = [3, 2] inner_tiles = [2, %high] into %output : tensor<1x7x5x1xf32> -> tensor<7x1x1x1x2x?xf32>
+ return %0 : tensor<7x1x1x1x2x?xf32>
+}
+// CHECK-LABEL: func.func @negative_not_all_dims_tiled_outer_dim_1_permuted
+// CHECK: linalg.pack
+
+// -----
+
func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{
%0 = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
return %0 : tensor<1x1x32x8xf32>
@@ -295,3 +338,21 @@ func.func @pack_with_non_adjacent_and_non_permuted_inner_dims(%arg0: tensor<8x1x
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
// CHECK-SAME: [0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 1] [1, 1, 1, 1, 1, 1] : tensor<1x1x8x1xf32> into tensor<1x1x1x1x8x1xf32>
// CHECK: return %[[INSERT]]
+
+// -----
+
+/// Note "126", which is a non-unit tiled-outer-dim. This is not supported.
+
+func.func @negative_non_unit_tiled_outer_dim(%dest: tensor<1x126x1x1x8xf32>, %src: tensor<1x1x1x1001xf32>, %pad: f32) -> tensor<1x126x1x1x8xf32> {
+ %pack = linalg.pack %src
+ padding_value(%pad : f32)
+ outer_dims_perm = [0, 3, 2, 1]
+ inner_dims_pos = [3]
+ inner_tiles = [8]
+ into %dest
+ : tensor<1x1x1x1001xf32> -> tensor<1x126x1x1x8xf32>
+
+ return %pack : tensor<1x126x1x1x8xf32>
+}
+// CHECK-LABEL: @negative_non_unit_tiled_outer_dim(
+// CHECK: linalg.pack
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 618ba34..66cae5c 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } {
} -> tensor<1x1x4xf32>
return
}
+
+ func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) {
+ %0 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>)
+ outs(%arg2 : tensor<128x128xi32>) {
+ ^bb0(%in: i32, %in_0: i32, %out: i32):
+ linalg.yield %out : i32
+ } -> tensor<128x128xi32>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 9616a3e..1df15e8 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -10,10 +10,10 @@
// TODO: Some test cases from this file should be moved to other dialects.
-// CHECK-LABEL: func @fill_inplace(
+// CHECK-LABEL: func private @fill_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
-// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
-func.func @fill_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) {
+func.func private @fill_inplace(
%A : tensor<?xf32> {bufferization.writable = true})
-> tensor<?xf32>
{
@@ -56,10 +56,10 @@ func.func @not_inplace(
// -----
-// CHECK-LABEL: func @not_inplace
+// CHECK-LABEL: func private @not_inplace
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) {
-// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
-func.func @not_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) {
+func.func private @not_inplace(
%A : tensor<?x?xf32> {bufferization.writable = true})
-> tensor<?x?xf32>
{
@@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1(
// -----
-func.func @gather_like(
+func.func private @gather_like(
%arg0 : tensor<?x?xf32> {bufferization.writable = false},
%arg1 : tensor<?xi32> {bufferization.writable = false},
%arg2 : tensor<?x?xf32> {bufferization.writable = true})
@@ -254,7 +254,7 @@ func.func @gather_like(
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @gather_like(
+// CHECK-LABEL: func private @gather_like(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32,
// CHECK-SAME: %[[ARG1:.+]]: memref<?xi32
// CHECK-SAME: %[[ARG2:.+]]: memref<?x?xf32
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index 9a44f95..7dc0a87b 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -18,7 +18,7 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -48,7 +48,7 @@ func.func @fuse_unary(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1]}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
transform.loop.peel %loops#0 : (!transform.op<"scf.for">) -> (!transform.any_op, !transform.any_op)
transform.yield
@@ -57,6 +57,60 @@ module attributes {transform.with_named_sequence} {
// -----
+// CHECK-LABEL: func.func @fuse_unary_param
+func.func @fuse_unary_param(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+
+ // CHECK: %[[RES:.*]] = scf.for
+ // CHECK: scf.for
+ // CHECK: linalg.exp
+ // CHECK: linalg.add
+ // CHECK: return %[[RES]]
+ %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
+ outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+ %1 = linalg.add ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %1 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.param.constant 32 : i32 -> !transform.param<i32>
+ %2 = transform.param.constant 1 : i32 -> !transform.param<i32>
+ %3, %loops:2 = transform.structured.fuse %0 tile_sizes [%1, 32] interchange [0, %2]
+ : (!transform.any_op, !transform.param<i32>, !transform.param<i32>) ->
+ (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: func.func @fuse_unary_forall
+func.func @fuse_unary_forall(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
+
+ // CHECK: %[[RES:.*]] = scf.forall
+ // CHECK: linalg.exp
+ // CHECK: linalg.add
+ // CHECK: return %[[RES]]
+ %0 = linalg.exp ins(%arg0 : tensor<?x?xf32>)
+ outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+ %1 = linalg.add ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %1 : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1, %loop = transform.structured.fuse %0 tile_sizes [32, 32] {use_forall}
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
// CHECK-LABEL: func.func @interchange_reduction
// CHECK-SAME: (%[[INPUT:.+]]: tensor<12x7x25xf32>)
func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf32> {
@@ -93,7 +147,7 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [5, 0, 7], tile_interchange = [0, 2, 1]}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [5, 0, 7] interchange [0, 2, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
%2, %loops_2 = transform.structured.tile_using_for %1 tile_sizes [0, 4]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
@@ -121,7 +175,7 @@ func.func @unpack_elemwise(%arg0: tensor<16x48x8x8xf32>, %arg1: tensor<128x384xf
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [16, 32], tile_interchange = [0, 1]}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [16, 32] interchange [0, 1]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -147,7 +201,7 @@ func.func @pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x8x8xf32
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [3, 5, 0, 0]}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [3, 5, 0, 0]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -173,7 +227,7 @@ func.func @nofuse_pack_elemwise(%arg0: tensor<128x384xf32>, %arg1: tensor<16x48x
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:3 = transform.structured.fuse %0 {tile_sizes = [3, 5, 2, 0]}
+ %1, %loops:3 = transform.structured.fuse %0 tile_sizes [3, 5, 2, 0]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -204,7 +258,7 @@ func.func @fuse_through_slice(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) ->
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1] {apply_cleanup}
: (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
transform.yield
}
@@ -238,7 +292,7 @@ func.func @fuse_through_slice_and_cast_chain(%arg0: tensor<100x100xf32>, %arg1:
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1] {apply_cleanup}
: (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
transform.yield
}
@@ -273,7 +327,7 @@ func.func @fuse_unrelated_slices(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>)
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.add"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1, %loops:2 = transform.structured.fuse %0 {tile_sizes = [32, 32], tile_interchange = [0, 1], apply_cleanup = true}
+ %1, %loops:2 = transform.structured.fuse %0 tile_sizes [32, 32] interchange [0, 1] {apply_cleanup}
: (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
transform.yield
}
@@ -299,7 +353,7 @@ func.func @bubble_up_extract_slice_through_expand_shape(%0: tensor<60xf32>) -> t
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:3 = transform.structured.fuse %0 [1, 1, 5] interchange [0, 1, 2] apply_cleanup = true :
+ %transformed, %loops:3 = transform.structured.fuse %0 tile_sizes [1, 1, 5] interchange [0, 1, 2] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -324,7 +378,7 @@ func.func @bubble_up_extract_slice_through_expand_shape_full_inner_dim(%0: tenso
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:2 = transform.structured.fuse %0 [1, 2, 0] interchange [0, 1, 2] apply_cleanup = true :
+ %transformed, %loops:2 = transform.structured.fuse %0 tile_sizes [1, 2, 0] interchange [0, 1, 2] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op)
transform.yield
}
@@ -348,7 +402,7 @@ func.func @no_bubble_up_extract_slice_through_expand_shape_non_contiguous(%0: te
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:3 = transform.structured.fuse %0 [1, 2, 5] interchange [0, 1, 2] apply_cleanup = true :
+ %transformed, %loops:3 = transform.structured.fuse %0 tile_sizes [1, 2, 5] interchange [0, 1, 2] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -379,7 +433,7 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:4 = transform.structured.fuse %0 [1, 2, 0, 1, 4] interchange [0, 1, 2, 3, 4] apply_cleanup = true :
+ %transformed, %loops:4 = transform.structured.fuse %0 tile_sizes [1, 2, 0, 1, 4] interchange [0, 1, 2, 3, 4] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -408,7 +462,7 @@ module {
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:1 = transform.structured.fuse %0 [0, 0, 1, 0] interchange [0, 1, 2, 3] apply_cleanup = true :
+ %transformed, %loops:1 = transform.structured.fuse %0 tile_sizes [0, 0, 1, 0] interchange [0, 1, 2, 3] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">)
transform.yield
}
@@ -433,7 +487,7 @@ func.func @no_bubble_up_extract_slice_through_expand_shape_on_cleanup_false(%0:
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:3 = transform.structured.fuse %0 [1, 1, 5] interchange [0, 1, 2] apply_cleanup = false :
+ %transformed, %loops:3 = transform.structured.fuse %0 tile_sizes [1, 1, 5] interchange [0, 1, 2] :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -456,7 +510,7 @@ func.func @bubble_up_extract_slice_through_collapse_shape(%0: tensor<1x8x1800x32
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:1 = transform.structured.fuse %0 [1, 0, 0] interchange [0, 1, 2] apply_cleanup = true :
+ %transformed, %loops:1 = transform.structured.fuse %0 tile_sizes [1, 0, 0] interchange [0, 1, 2] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">)
transform.yield
}
@@ -482,7 +536,7 @@ func.func @bubble_up_extract_slice_through_collapse_shape_with_collapse_producer
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["linalg.exp"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- %transformed, %loops:1 = transform.structured.fuse %0 [1, 0, 0] interchange [0, 1, 2] apply_cleanup = true :
+ %transformed, %loops:1 = transform.structured.fuse %0 tile_sizes [1, 0, 0] interchange [0, 1, 2] {apply_cleanup} :
(!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">)
transform.yield
}
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
index 35f520a..93a0336 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
@@ -1,5 +1,9 @@
// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.dot
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: contraction_dot
func.func @contraction_dot(%A: memref<1584xf32>, %B: memref<1584xf32>, %C: memref<f32>) {
@@ -20,6 +24,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.matvec
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: contraction_matvec
func.func @contraction_matvec(%A: memref<1584x1584xf32>, %B: memref<1584xf32>, %C: memref<1584xf32>) {
@@ -41,6 +49,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.matmul
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: contraction_matmul
func.func @contraction_matmul(%A: memref<1584x1584xf32>, %B: memref<1584x1584xf32>, %C: memref<1584x1584xf32>) {
// CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584xf32>
@@ -138,6 +150,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.batch_matmul
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: contraction_batch_matmul
func.func @contraction_batch_matmul(%A: memref<1584x1584x1584xf32>, %B: memref<1584x1584x1584xf32>, %C: memref<1584x1584x1584xf32>) {
// CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584x1584xf32>
@@ -159,6 +175,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.cantract
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: @matmul_as_contract
// CHECK-SAME: %[[A:.*]]: tensor<24x12xf32>
// CHECK-SAME: %[[B:.*]]: tensor<12x25xf32>
@@ -220,6 +240,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.fill
+///----------------------------------------------------------------------------------------
+
// CHECK-LABEL: func @test_vectorize_fill
func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) {
// CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32>
@@ -259,70 +283,14 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: func @test_vectorize_copy
-func.func @test_vectorize_copy(%A : memref<8x16xf32>, %B : memref<8x16xf32>) {
- // CHECK: %[[V:.*]] = vector.transfer_read {{.*}} : memref<8x16xf32>, vector<8x16xf32>
- // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32>
- memref.copy %A, %B : memref<8x16xf32> to memref<8x16xf32>
- return
-}
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.pack
+///----------------------------------------------------------------------------------------
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
- %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
+// Note, see a similar test in:
+// * vectorization.mlir.
-// -----
-
-// CHECK-LABEL: func @test_vectorize_copy_0d
-func.func @test_vectorize_copy_0d(%A : memref<f32>, %B : memref<f32>) {
- // CHECK-SAME: (%[[A:.*]]: memref<f32>, %[[B:.*]]: memref<f32>)
- // CHECK: %[[V:.*]] = vector.transfer_read %[[A]][]{{.*}} : memref<f32>, vector<f32>
- // CHECK: %[[val:.*]] = vector.extract %[[V]][] : f32 from vector<f32>
- // CHECK: %[[VV:.*]] = vector.broadcast %[[val]] : f32 to vector<f32>
- // CHECK: vector.transfer_write %[[VV]], %[[B]][] : vector<f32>, memref<f32>
- memref.copy %A, %B : memref<f32> to memref<f32>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
- %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: func @test_vectorize_copy_complex
-// CHECK-NOT: vector<
-func.func @test_vectorize_copy_complex(%A : memref<8x16xcomplex<f32>>, %B : memref<8x16xcomplex<f32>>) {
- memref.copy %A, %B : memref<8x16xcomplex<f32>> to memref<8x16xcomplex<f32>>
- return
-}
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
- %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// Input identical as the test in vectorization.mlir. Output is different -
-// vector sizes are inferred (rather than user-specified) and hence _no_
-// masking was used.
-
-func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
+func.func @pack_no_padding(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
%pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
return %pack : tensor<4x1x32x16x2xf32>
}
@@ -336,7 +304,7 @@ module attributes {transform.with_named_sequence} {
}
}
-// CHECK-LABEL: func.func @test_vectorize_pack(
+// CHECK-LABEL: func.func @pack_no_padding(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x8x16xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
// CHECK-DAG: %[[VAL_2:.*]] = ub.poison : f32
@@ -349,13 +317,16 @@ module attributes {transform.with_named_sequence} {
// -----
-func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
+// Note, see a similar test in:
+// * vectorization.mlir.
+
+func.func @pack_with_padding(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
%pad = arith.constant 0.000000e+00 : f32
%pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
return %pack : tensor<32x4x1x16x2xf32>
}
-// CHECK-LABEL: func.func @test_vectorize_padded_pack(
+// CHECK-LABEL: func.func @pack_with_padding(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x7x15xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32
@@ -377,6 +348,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.map
+///----------------------------------------------------------------------------------------
+
func.func @vectorize_map(%arg0: memref<64xf32>,
%arg1: memref<64xf32>, %arg2: memref<64xf32>) {
linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>)
@@ -403,6 +378,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.transpose
+///----------------------------------------------------------------------------------------
+
func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>,
%arg1: memref<32x64x16xf32>) {
linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
@@ -424,6 +403,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.reduce
+///----------------------------------------------------------------------------------------
+
func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>,
%arg1: memref<16x64xf32>) {
linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
@@ -449,6 +432,10 @@ module attributes {transform.with_named_sequence} {
// -----
+///----------------------------------------------------------------------------------------
+/// Tests for linalg.generic
+///----------------------------------------------------------------------------------------
+
#matmul_trait = {
indexing_maps = [
affine_map<(m, n, k) -> (m, k)>,
@@ -1446,6 +1433,8 @@ module attributes {transform.with_named_sequence} {
// -----
+// TODO: Two Linalg Ops in one tests - either split or document "why".
+
// CHECK-DAG: #[[$M6:.*]] = affine_map<(d0, d1) -> (d0, 0)>
// CHECK-LABEL: func @fused_broadcast_red_2d
@@ -1896,3 +1885,65 @@ module attributes {transform.with_named_sequence} {
}
}
+// -----
+
+///----------------------------------------------------------------------------------------
+/// Tests for memref.copy
+///----------------------------------------------------------------------------------------
+
+// CHECK-LABEL: func @test_vectorize_copy
+func.func @test_vectorize_copy(%A : memref<8x16xf32>, %B : memref<8x16xf32>) {
+ // CHECK: %[[V:.*]] = vector.transfer_read {{.*}} : memref<8x16xf32>, vector<8x16xf32>
+ // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32>
+ memref.copy %A, %B : memref<8x16xf32> to memref<8x16xf32>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+ %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: func @test_vectorize_copy_0d
+func.func @test_vectorize_copy_0d(%A : memref<f32>, %B : memref<f32>) {
+ // CHECK-SAME: (%[[A:.*]]: memref<f32>, %[[B:.*]]: memref<f32>)
+ // CHECK: %[[V:.*]] = vector.transfer_read %[[A]][]{{.*}} : memref<f32>, vector<f32>
+ // CHECK: %[[val:.*]] = vector.extract %[[V]][] : f32 from vector<f32>
+ // CHECK: %[[VV:.*]] = vector.broadcast %[[val]] : f32 to vector<f32>
+ // CHECK: vector.transfer_write %[[VV]], %[[B]][] : vector<f32>, memref<f32>
+ memref.copy %A, %B : memref<f32> to memref<f32>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+ %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: func @test_vectorize_copy_complex
+// CHECK-NOT: vector<
+func.func @test_vectorize_copy_complex(%A : memref<8x16xcomplex<f32>>, %B : memref<8x16xcomplex<f32>>) {
+ memref.copy %A, %B : memref<8x16xcomplex<f32>> to memref<8x16xcomplex<f32>>
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+ %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
index 11bea8d..1304a90 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir
@@ -1307,14 +1307,17 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf
/// Tests for linalg.pack
///----------------------------------------------------------------------------------------
-// Input identical as the test in vectorization-with-patterns.mlir. Output is
-// different - vector sizes are inferred (rather than user-specified) and hence
-// masking was used.
+// This packing requires no padding, so no out-of-bounds read/write vector Ops.
-// CHECK-LABEL: func @test_vectorize_pack
+// Note, see a similar test in:
+// * vectorization-with-patterns.mlir
+// The output is identical (the input vector sizes == the inferred vector
+// sizes, i.e. the tensor sizes).
+
+// CHECK-LABEL: func @pack_no_padding
// CHECK-SAME: %[[SRC:.*]]: tensor<32x8x16xf32>,
// CHECK-SAME: %[[DEST:.*]]: tensor<4x1x32x16x2xf32>
-func.func @test_vectorize_pack(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
+func.func @pack_no_padding(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> {
%pack = linalg.pack %src outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %dest : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32>
return %pack : tensor<4x1x32x16x2xf32>
}
@@ -1325,9 +1328,9 @@ func.func @test_vectorize_pack(%src: tensor<32x8x16xf32>, %dest: tensor<4x1x32x1
// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32>
// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
-// CHECK: %[[write:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
+// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32>
-// CHECK: return %[[write]] : tensor<4x1x32x16x2xf32>
+// CHECK: return %[[WRITE]] : tensor<4x1x32x16x2xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%src: !transform.any_op {transform.readonly}) {
@@ -1339,14 +1342,18 @@ module attributes {transform.with_named_sequence} {
// -----
-// Input identical as the test in vectorization-with-patterns.mlir. Output is
-// different - vector sizes are inferred (rather than user-specified) and hence
-// masking was used.
+// This packing does require padding, so there are out-of-bounds read/write
+// vector Ops.
+
+// Note, see a similar test in:
+// * vectorization-with-patterns.mlir.
+// The output is different (the input vector sizes != inferred vector sizes,
+// i.e. the tensor sizes).
-// CHECK-LABEL: func @test_vectorize_padded_pack
+// CHECK-LABEL: func @pack_with_padding
// CHECK-SAME: %[[SRC:.*]]: tensor<32x7x15xf32>,
// CHECK-SAME: %[[DEST:.*]]: tensor<32x4x1x16x2xf32>
-func.func @test_vectorize_padded_pack(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
+func.func @pack_with_padding(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
%pad = arith.constant 0.000000e+00 : f32
%pack = linalg.pack %src padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %dest : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
return %pack : tensor<32x4x1x16x2xf32>
@@ -1364,9 +1371,9 @@ func.func @test_vectorize_padded_pack(%src: tensor<32x7x15xf32>, %dest: tensor<3
// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
-// CHECK: %[[write:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
+// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
-// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32>
+// CHECK: return %[[WRITE]] : tensor<32x4x1x16x2xf32>
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
@@ -1378,10 +1385,46 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: func @test_vectorize_dynamic_pack
+// This packing does require padding, so there are out-of-bounds read/write
+// vector Ops.
+
+// Note, see a similar test in:
+// * vectorization-with-patterns.mlir.
+// The output is identical (in both cases the vector sizes are inferred).
+
+// CHECK-LABEL: func @pack_with_padding_no_vector_sizes
+// CHECK-SAME: %[[SRC:.*]]: tensor<32x7x15xf32>,
+// CHECK-SAME: %[[DEST:.*]]: tensor<32x4x1x16x2xf32>
+func.func @pack_with_padding_no_vector_sizes(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
+ %pad = arith.constant 0.000000e+00 : f32
+ %pack = linalg.pack %src padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %dest : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
+ return %pack : tensor<32x4x1x16x2xf32>
+}
+// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[CST]]
+// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32>
+// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
+// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
+// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
+// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
+// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
+// CHECK: return %[[WRITE]] : tensor<32x4x1x16x2xf32>
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 : !transform.any_op
+ transform.yield
+ }
+}
+
+// -----
+
+// CHECK-LABEL: func @pack_with_dynamic_dims
// CHECK-SAME: %[[SRC:.*]]: tensor<?x?xf32>,
// CHECK-SAME: %[[DEST:.*]]: tensor<?x?x16x2xf32>
-func.func @test_vectorize_dynamic_pack(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
+func.func @pack_with_dynamic_dims(%src: tensor<?x?xf32>, %dest: tensor<?x?x16x2xf32>) -> tensor<?x?x16x2xf32> {
%pack = linalg.pack %src inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %dest : tensor<?x?xf32> -> tensor<?x?x16x2xf32>
return %pack : tensor<?x?x16x2xf32>
}
@@ -1418,64 +1461,6 @@ module attributes {transform.with_named_sequence} {
}
}
-// -----
-
-// CHECK-LABEL: func @test_vectorize_pack_no_vector_sizes
-// CHECK-SAME: %[[SRC:.*]]: tensor<64x4xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<2x4x16x2xf32>
-func.func @test_vectorize_pack_no_vector_sizes(%src: tensor<64x4xf32>, %dest: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> {
- %pack = linalg.pack %src outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %dest : tensor<64x4xf32> -> tensor<2x4x16x2xf32>
- return %pack : tensor<2x4x16x2xf32>
-}
-// CHECK-DAG: %[[CST:.*]] = ub.poison : f32
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST]]
-// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32>
-// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<64x4xf32> to vector<4x16x2x2xf32>
-// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32>
-// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
-// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
-// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32>
-// CHECK: return %[[WRITE]] : tensor<2x4x16x2xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 : !transform.any_op
- transform.yield
- }
-}
-
-// -----
-
-// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes
-// CHECK-SAME: %[[SRC:.*]]: tensor<32x7x15xf32>,
-// CHECK-SAME: %[[DEST:.*]]: tensor<32x4x1x16x2xf32>
-func.func @test_vectorize_padded_pack_no_vector_sizes(%src: tensor<32x7x15xf32>, %dest: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> {
- %pad = arith.constant 0.000000e+00 : f32
- %pack = linalg.pack %src padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %dest : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32>
- return %pack : tensor<32x4x1x16x2xf32>
-}
-// CHECK-DAG: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]], %[[C0]]], %[[CST]]
-// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32>
-// CHECK: %[[SC:.*]] = vector.shape_cast %[[READ]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32>
-// CHECK: %[[TR:.*]] = vector.transpose %[[SC]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32>
-// CHECK-DAG: %[[C0_1:.*]] = arith.constant 0 : index
-// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[TR]], %[[DEST]][%[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]], %[[C0_1]]]
-// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32>
-// CHECK: return %[[WRITE]] : tensor<32x4x1x16x2xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 : !transform.any_op
- transform.yield
- }
-}
-
-
///----------------------------------------------------------------------------------------
/// Tests for other Ops
///----------------------------------------------------------------------------------------
diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir
index 16b7a5c..7160b52 100644
--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@@ -911,6 +911,21 @@ func.func @reinterpret_noop(%arg : memref<2x3x4xf32>) -> memref<2x3x4xf32> {
// -----
+// CHECK-LABEL: func @reinterpret_constant_fold
+// CHECK-SAME: (%[[ARG:.*]]: memref<f32>)
+// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [0], sizes: [100, 100], strides: [100, 1]
+// CHECK: %[[CAST:.*]] = memref.cast %[[RES]]
+// CHECK: return %[[CAST]]
+func.func @reinterpret_constant_fold(%arg0: memref<f32>) -> memref<?x?xf32, strided<[?, ?], offset: ?>> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c100 = arith.constant 100 : index
+ %reinterpret_cast = memref.reinterpret_cast %arg0 to offset: [%c0], sizes: [%c100, %c100], strides: [%c100, %c1] : memref<f32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
+ return %reinterpret_cast : memref<?x?xf32, strided<[?, ?], offset: ?>>
+}
+
+// -----
+
// CHECK-LABEL: func @reinterpret_of_reinterpret
// CHECK-SAME: (%[[ARG:.*]]: memref<?xi8>, %[[SIZE1:.*]]: index, %[[SIZE2:.*]]: index)
// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [0], sizes: [%[[SIZE2]]], strides: [1]
@@ -996,10 +1011,9 @@ func.func @reinterpret_of_extract_strided_metadata_same_type(%arg0 : memref<?x?x
// when the strides don't match.
// CHECK-LABEL: func @reinterpret_of_extract_strided_metadata_w_different_stride
// CHECK-SAME: (%[[ARG:.*]]: memref<8x2xf32>)
-// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [%[[C0]]], sizes: [4, 2, 2], strides: [1, 1, %[[C1]]]
-// CHECK: return %[[RES]]
+// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [0], sizes: [4, 2, 2], strides: [1, 1, 1]
+// CHECK: %[[CAST:.*]] = memref.cast %[[RES]]
+// CHECK: return %[[CAST]]
func.func @reinterpret_of_extract_strided_metadata_w_different_stride(%arg0 : memref<8x2xf32>) -> memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> {
%base, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %arg0 : memref<8x2xf32> -> memref<f32>, index, index, index, index, index
%m2 = memref.reinterpret_cast %base to offset: [%offset], sizes: [4, 2, 2], strides: [1, 1, %strides#1] : memref<f32> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
@@ -1011,11 +1025,9 @@ func.func @reinterpret_of_extract_strided_metadata_w_different_stride(%arg0 : me
// when the offset doesn't match.
// CHECK-LABEL: func @reinterpret_of_extract_strided_metadata_w_different_offset
// CHECK-SAME: (%[[ARG:.*]]: memref<8x2xf32>)
-// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index
-// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [1], sizes: [%[[C8]], %[[C2]]], strides: [%[[C2]], %[[C1]]]
-// CHECK: return %[[RES]]
+// CHECK: %[[RES:.*]] = memref.reinterpret_cast %[[ARG]] to offset: [1], sizes: [8, 2], strides: [2, 1]
+// CHECK: %[[CAST:.*]] = memref.cast %[[RES]]
+// CHECK: return %[[CAST]]
func.func @reinterpret_of_extract_strided_metadata_w_different_offset(%arg0 : memref<8x2xf32>) -> memref<?x?xf32, strided<[?, ?], offset: ?>> {
%base, %offset, %sizes:2, %strides:2 = memref.extract_strided_metadata %arg0 : memref<8x2xf32> -> memref<f32>, index, index, index, index, index
%m2 = memref.reinterpret_cast %base to offset: [1], sizes: [%sizes#0, %sizes#1], strides: [%strides#0, %strides#1] : memref<f32> to memref<?x?xf32, strided<[?, ?], offset: ?>>
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir
index 603ace8..3d4bec7 100644
--- a/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir
@@ -3,7 +3,7 @@
func.func @test_static_memref_alloc() {
%0 = memref.alloca() {test.ptr} : memref<10x20xf32>
// CHECK: Successfully generated alloc for operation: %[[ORIG:.*]] = memref.alloca() {test.ptr} : memref<10x20xf32>
- // CHECK: Generated: %{{.*}} = memref.alloca() : memref<10x20xf32>
+ // CHECK: Generated: %{{.*}} = memref.alloca() {acc.var_name = #acc.var_name<"test_alloc">} : memref<10x20xf32>
return
}
@@ -19,6 +19,6 @@ func.func @test_dynamic_memref_alloc() {
// CHECK: Generated: %[[DIM0:.*]] = memref.dim %[[ORIG]], %[[C0]] : memref<?x?xf32>
// CHECK: Generated: %[[C1:.*]] = arith.constant 1 : index
// CHECK: Generated: %[[DIM1:.*]] = memref.dim %[[ORIG]], %[[C1]] : memref<?x?xf32>
- // CHECK: Generated: %{{.*}} = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+ // CHECK: Generated: %{{.*}} = memref.alloc(%[[DIM0]], %[[DIM1]]) {acc.var_name = #acc.var_name<"test_alloc">} : memref<?x?xf32>
return
}
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
new file mode 100644
index 0000000..8846c9e
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
@@ -0,0 +1,102 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"scalar">} : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"static_2d">} : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {acc.var_name = #acc.var_name<"dynamic_2d">} : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) {acc.var_name = #acc.var_name<"mixed_dims">} : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"scalar_int">} : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
new file mode 100644
index 0000000..3d5a918
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s
+
+// CHECK: acc.private.recipe @private_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"scalar">} : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"static_2d">} : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) {acc.var_name = #acc.var_name<"dynamic_2d">} : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) {acc.var_name = #acc.var_name<"mixed_dims">} : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() {acc.var_name = #acc.var_name<"scalar_int">} : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index a1067ec..af09dc8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -8,11 +8,11 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @scf_for_yield_only(
+// CHECK-LABEL: func private @scf_for_yield_only(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: ) -> memref<?xf32> {
-func.func @scf_for_yield_only(
+func.func private @scf_for_yield_only(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index, %ub : index, %step : index)
@@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
// -----
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor.insert_slice(
+func.func private @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = false},
@@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>,
// -----
-// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict(
+// CHECK-LABEL: func private @parallel_insert_slice_no_conflict(
// CHECK-SAME: %[[idx:.*]]: index, %[[idx2:.*]]: index,
// CHECK-SAME: %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
// CHECK-SAME: %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
-func.func @parallel_insert_slice_no_conflict(
+func.func private @parallel_insert_slice_no_conflict(
%idx: index,
%idx2: index,
%arg1: tensor<?xf32> {bufferization.writable = true},
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 5f95da2..f66cf7a 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -8,12 +8,12 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @insert_slice_fun
+// CHECK-LABEL: func private @insert_slice_fun
// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @insert_slice_fun(
+func.func private @insert_slice_fun(
%A0 : tensor<?xf32> {bufferization.writable = false},
%A1 : tensor<?xf32> {bufferization.writable = true},
%t0 : tensor<4xf32> {bufferization.writable = false},
@@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index)
// -----
// CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)>
-// CHECK-LABEL: func.func @cast_retains_buffer_layout(
+// CHECK-LABEL: func.func private @cast_retains_buffer_layout(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout(
+func.func private @cast_retains_buffer_layout(
%t: tensor<?xf32>
{bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>},
%sz: index)
@@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout(
// -----
-// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided(
+// CHECK-LABEL: func private @cast_retains_buffer_layout_strided(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout_strided(
+func.func private @cast_retains_buffer_layout_strided(
%t: tensor<?xf32>
{bufferization.buffer_layout = strided<[1], offset: 5>},
%sz: index)
@@ -490,3 +490,32 @@ func.func @collapse_shape_regression(
tensor.collapse_shape %0[[0, 1]] : tensor<5x6xf32> into tensor<30xf32>
return
}
+
+// -----
+
+// CHECK-LABEL: func private @mult_return_callee(
+// CHECK-SAME: %[[T:.*]]: memref<?xf32, strided<[?], offset: ?>>, %[[COND:.*]]: i1,
+// CHECK-SAME: %[[A:.*]]: index, %[[B:.*]]: index) -> index {
+// CHECK: cf.cond_br %[[COND]], ^bb1, ^bb2
+// CHECK: ^bb1:
+// CHECK: return %[[A]] : index
+// CHECK: ^bb2:
+// CHECK: return %[[B]] : index
+func.func private @mult_return_callee(%t: tensor<?xf32>, %cond:i1, %a: index, %b: index) -> (tensor<10xf32>, index) {
+ %casted = tensor.cast %t : tensor<?xf32> to tensor<10xf32>
+ cf.cond_br %cond,^a, ^b
+^a:
+ return %casted, %a : tensor<10xf32>, index
+^b:
+ return %casted, %b : tensor<10xf32>, index
+}
+
+// CHECK-LABEL: func @mult_return(
+// CHECK-SAME: %[[T:.*]]: memref<?xf32, strided<[?], offset: ?>>, %[[COND:.*]]: i1,
+// CHECK-SAME: %[[A:.*]]: index, %[[B:.*]]: index) -> (memref<?xf32, strided<[?], offset: ?>>, index) {
+func.func @mult_return(%t: tensor<?xf32>, %cond:i1, %a: index, %b: index) -> (tensor<10xf32>, index) {
+ // CHECK: %[[RET:.*]] = call @mult_return_callee(%[[T]], %[[COND]], %[[A]], %[[B]]) : (memref<?xf32, strided<[?], offset: ?>>, i1, index, index) -> index
+ // CHECK: return %[[T]], %[[RET]] : memref<?xf32, strided<[?], offset: ?>>, index
+ %t_res, %v = func.call @mult_return_callee(%t, %cond, %a, %b) : (tensor<?xf32>, i1, index, index) -> (tensor<10xf32>, index)
+ return %t_res, %v : tensor<10xf32>, index
+}
diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir
index 04a99b5..32fb0c9 100644
--- a/mlir/test/Dialect/Tensor/tiling.mlir
+++ b/mlir/test/Dialect/Tensor/tiling.mlir
@@ -149,7 +149,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%copy = transform.structured.match ops{["linalg.copy"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %copy [2, 3]
+ %a, %b, %c = transform.structured.fuse %copy tile_sizes [2, 3]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
diff --git a/mlir/test/Dialect/Tosa/tosa-attach-target.mlir b/mlir/test/Dialect/Tosa/tosa-attach-target.mlir
index d6c886c..a0c59c0 100644
--- a/mlir/test/Dialect/Tosa/tosa-attach-target.mlir
+++ b/mlir/test/Dialect/Tosa/tosa-attach-target.mlir
@@ -1,12 +1,14 @@
// RUN: mlir-opt %s -split-input-file -tosa-attach-target="profiles=pro_int,pro_fp extensions=int16,int4,bf16,fp8e4m3,fp8e5m2,fft,variable,controlflow,doubleround,inexactround,dynamic level=none" | FileCheck %s --check-prefix=CHECK-ALL
// RUN: mlir-opt %s -split-input-file -tosa-attach-target="level=8k" | FileCheck %s --check-prefix=CHECK-LVL-8K
// RUN: mlir-opt %s -split-input-file -tosa-attach-target | FileCheck %s --check-prefix=CHECK-DEFAULT
+// RUN: mlir-opt %s -split-input-file -tosa-attach-target="specification_version=1.1.draft" | FileCheck %s --check-prefix=CHECK-VERSION-1P1
// -----
-// CHECK-ALL: module attributes {tosa.target_env = #tosa.target_env<level = none, profiles = [pro_int, pro_fp], extensions = [int16, int4, bf16, fp8e4m3, fp8e5m2, fft, variable, controlflow, doubleround, inexactround, dynamic]>}
-// CHECK-LVL-8K: module attributes {tosa.target_env = #tosa.target_env<level = "8k", profiles = [], extensions = []>}
-// CHECK-DEFAULT: module attributes {tosa.target_env = #tosa.target_env<level = "8k", profiles = [], extensions = []>}
+// CHECK-ALL: module attributes {tosa.target_env = #tosa.target_env<specification_version = "1.0", level = none, profiles = [pro_int, pro_fp], extensions = [int16, int4, bf16, fp8e4m3, fp8e5m2, fft, variable, controlflow, doubleround, inexactround, dynamic]>}
+// CHECK-LVL-8K: module attributes {tosa.target_env = #tosa.target_env<specification_version = "1.0", level = "8k", profiles = [], extensions = []>}
+// CHECK-DEFAULT: module attributes {tosa.target_env = #tosa.target_env<specification_version = "1.0", level = "8k", profiles = [], extensions = []>}
+// CHECK-VERSION-1P1: module attributes {tosa.target_env = #tosa.target_env<specification_version = "1.1.draft", level = "8k", profiles = [], extensions = []>}
// CHECK-LABEL: test_simple
func.func @test_simple(%arg0 : tensor<1x1x1x1xf32>, %arg1 : tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32> {
%1 = tosa.add %arg0, %arg1 : (tensor<1x1x1x1xf32>, tensor<1x1x1x1xf32>) -> tensor<1x1x1x1xf32>
diff --git a/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir b/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir
new file mode 100644
index 0000000..51089df
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-attach-target="specification_version=1.0 profiles=pro_int,pro_fp extensions=int16,int4,bf16,fp8e4m3,fp8e5m2,fft,variable,controlflow,dynamic,doubleround,inexactround" -tosa-validate="strict-op-spec-alignment"
+
+// -----
+
+func.func @test_matmul_fp8_mixed_precision_operands(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16> {
+ %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2>
+ // expected-error@+1 {{'tosa.matmul' op illegal: the target specification version (1.0) is not backwards compatible with the op compliance specification version (1.1)}}
+ %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf8E4M3FN>, tensor<1x19x28xf8E5M2>, tensor<1xf8E4M3FN>, tensor<1xf8E5M2>) -> tensor<1x14x28xf16>
+ return %0 : tensor<1x14x28xf16>
+}
+
+// -----
+
+func.func @test_matmul_fp8_input_fp32_acc_type(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x19x28xf8E4M3FN>) -> tensor<1x14x28xf32> {
+ %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ // expected-error@+1 {{'tosa.matmul' op illegal: the target specification version (1.0) is not backwards compatible with the op compliance specification version (1.1)}}
+ %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf8E4M3FN>, tensor<1x19x28xf8E4M3FN>, tensor<1xf8E4M3FN>, tensor<1xf8E4M3FN>) -> tensor<1x14x28xf32>
+ return %0 : tensor<1x14x28xf32>
+}
diff --git a/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir
new file mode 100644
index 0000000..8164509
--- /dev/null
+++ b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir
@@ -0,0 +1,20 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-attach-target="specification_version=1.1.draft profiles=pro_int,pro_fp extensions=int16,int4,bf16,fp8e4m3,fp8e5m2,fft,variable,controlflow,doubleround,inexactround" -tosa-validate="strict-op-spec-alignment" | FileCheck %s
+
+// -----
+
+func.func @test_matmul_fp8_mixed_precision_operands(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16> {
+ %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2>
+ %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf8E4M3FN>, tensor<1x19x28xf8E5M2>, tensor<1xf8E4M3FN>, tensor<1xf8E5M2>) -> tensor<1x14x28xf16>
+ return %0 : tensor<1x14x28xf16>
+}
+
+// -----
+
+// CHECK-LABEL: test_matmul_fp8_input_fp32_acc_type
+func.func @test_matmul_fp8_input_fp32_acc_type(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x19x28xf8E4M3FN>) -> tensor<1x14x28xf32> {
+ %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN>
+ %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf8E4M3FN>, tensor<1x19x28xf8E4M3FN>, tensor<1xf8E4M3FN>, tensor<1xf8E4M3FN>) -> tensor<1x14x28xf32>
+ return %0 : tensor<1x14x28xf32>
+}
diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
new file mode 100644
index 0000000..023a0e5
--- /dev/null
+++ b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
@@ -0,0 +1,311 @@
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
+
+///===----------------------------------------------===//
+/// Tests of `StepCompareFolder`
+///===----------------------------------------------===//
+
+
+///===------------------------------------===//
+/// Tests of `ugt` (unsigned greater than)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ugt_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 3 > [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 > [0, 1, 2] => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_max_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_max_rhs() -> vector<3xi1> {
+ // The largest i64 possible:
+ %cst = arith.constant dense<0x7fffffffffffffff> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 2 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 1 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `uge` (unsigned greater than or equal)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @uge_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 >= [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 >= [0, 1, 2] => [true, false, false] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @uge_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 2 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+
+///===------------------------------------===//
+/// Tests of `ult` (unsigned less than)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @ult_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 < [0, 1, 2] => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 < [0, 1, 2] => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ult_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 3 => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 2 => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ule` (unsigned less than or equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ule_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ule_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `eq` (equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @eq_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @eq_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_eq_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_eq_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ne` (not equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ne_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ne_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ne_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ne_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
index 35db14e..e5a98b5 100644
--- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir
+++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -188,15 +188,38 @@ func.func @vector_fma(%a: vector<4x4xf32>, %b: vector<4x4xf32>, %c: vector<4x4xf
// CHECK-LABEL: func @vector_fma
// CHECK-COUNT-4: vector.fma %{{.+}}, %{{.+}}, %{{.+}} : vector<2x2xf32>
-// TODO: We should be able to unroll this like the example above - this will require extending UnrollElementwisePattern.
-func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
+func.func @vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
%0 = vector.fma %a, %a, %a : vector<3x2x2xf32>
return %0 : vector<3x2x2xf32>
}
-// CHECK-LABEL: func @negative_vector_fma_3d
-// CHECK-NOT: vector.extract_strided_slice
-// CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32>
-// CHECK: return
+// CHECK-LABEL: func @vector_fma_3d
+// CHECK-SAME: (%[[SRC:.*]]: vector<3x2x2xf32>) -> vector<3x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<3x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_0:.*]] = vector.shape_cast %[[E_OUT_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA0:.*]] = vector.fma %[[S_LHS_0]], %[[S_RHS_0]], %[[S_OUT_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[FMA0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_1:.*]] = vector.shape_cast %[[E_OUT_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA1:.*]] = vector.fma %[[S_LHS_1]], %[[S_RHS_1]], %[[S_OUT_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[FMA1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_2:.*]] = vector.shape_cast %[[E_LHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_2:.*]] = vector.shape_cast %[[E_RHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_2:.*]] = vector.shape_cast %[[E_OUT_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA2:.*]] = vector.fma %[[S_LHS_2]], %[[S_RHS_2]], %[[S_OUT_2]] : vector<2x2xf32>
+// CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[FMA2]], %[[I1]] {offsets = [2, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: return %[[I2]] : vector<3x2x2xf32>
func.func @vector_multi_reduction(%v : vector<4x6xf32>, %acc: vector<4xf32>) -> vector<4xf32> {
%0 = vector.multi_reduction #vector.kind<add>, %v, %acc [1] : vector<4x6xf32> to vector<4xf32>
@@ -440,3 +463,36 @@ func.func @vector_step() -> vector<32xindex> {
// CHECK: %[[ADD3:.*]] = arith.addi %[[STEP]], %[[CST]] : vector<8xindex>
// CHECK: %[[INS3:.*]] = vector.insert_strided_slice %[[ADD3]], %[[INS2]] {offsets = [24], strides = [1]} : vector<8xindex> into vector<32xindex>
// CHECK: return %[[INS3]] : vector<32xindex>
+
+
+func.func @elementwise_3D_to_2D(%v1: vector<2x2x2xf32>, %v2: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2xf32>
+ return %0 : vector<2x2x2xf32>
+}
+// CHECK-LABEL: func @elementwise_3D_to_2D
+// CHECK-SAME: (%[[ARG0:.*]]: vector<2x2x2xf32>, %[[ARG1:.*]]: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD0:.*]] = arith.addf %[[S_LHS_0]], %[[S_RHS_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[ADD0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD1:.*]] = arith.addf %[[S_LHS_1]], %[[S_RHS_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[ADD1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: return %[[I1]] : vector<2x2x2xf32>
+
+
+func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf32>) -> vector<2x2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2x2xf32>
+ return %0 : vector<2x2x2x2xf32>
+}
+
+// CHECK-LABEL: func @elementwise_4D_to_2D
+// CHECK-COUNT-4: arith.addf %{{.*}}, %{{.*}} : vector<2x2xf32>
+// CHECK-NOT: arith.addf
+// CHECK: return
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index bb76392..0cf6dd1 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -474,6 +474,41 @@ func.func @warp_scf_for_use_from_above(%arg0: index) {
}
// -----
+// CHECK-PROP-LABEL: func.func @warp_scf_for_local_loop_bounds
+// CHECK-PROP: (%{{.*}}: index, %[[ARG1:[a-zA-Z0-9]+]]: index) {
+// CHECK-PROP: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32] args(%[[ARG1]] : index) -> (vector<4xf32>) {
+// CHECK-PROP: ^bb0(%{{.*}}: index):
+// CHECK-PROP: %[[T2:.*]] = "some_def"() : () -> vector<128xf32>
+// CHECK-PROP: gpu.yield %[[T2]] : vector<128xf32>
+// CHECK-PROP: }
+// CHECK-PROP: %[[FOR:.*]] = scf.for %{{.*}} to %[[ARG1]] step %{{.*}} iter_args(%{{.*}}) -> (vector<4xf32>) {
+// CHECK-PROP: %[[W2:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[32]
+// CHECK-PROP-SAME: args(%{{.*}} : vector<4xf32>) -> (vector<4xf32>) {
+// CHECK-PROP: ^bb0(%{{.*}}: vector<128xf32>):
+// CHECK-PROP: gpu.yield %{{.*}} : vector<128xf32>
+// CHECK-PROP: }
+// CHECK-PROP: scf.yield %[[W2]] : vector<4xf32>
+// CHECK-PROP: }
+// CHECK-PROP: "some_use"(%[[FOR]]) : (vector<4xf32>) -> ()
+// CHECK-PROP: return
+func.func @warp_scf_for_local_loop_bounds(%arg0: index, %bound: index) {
+ %c1 = arith.constant 1 : index
+ %c0 = arith.constant 0 : index
+ %0 = gpu.warp_execute_on_lane_0(%arg0)[32]
+ args(%bound : index) -> (vector<4xf32>) {
+ ^bb0(%arg1: index):
+ %ini = "some_def"() : () -> (vector<128xf32>)
+ %3 = scf.for %arg3 = %c0 to %arg1 step %c1 iter_args(%arg4 = %ini) -> (vector<128xf32>) {
+ %acc = "some_def"(%arg4) : (vector<128xf32>) -> (vector<128xf32>)
+ scf.yield %acc : vector<128xf32>
+ }
+ gpu.yield %3 : vector<128xf32>
+ }
+ "some_use"(%0) : (vector<4xf32>) -> ()
+ return
+}
+
+// -----
// CHECK-PROP-LABEL: func @warp_scf_for_swap(
// CHECK-PROP: %[[INI:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[32] -> (vector<4xf32>, vector<4xf32>) {
@@ -1925,3 +1960,22 @@ func.func @warp_scf_if_distribute(%pred : i1) {
// CHECK-PROP: "some_use"(%[[IF_YIELD_DIST]]) : (vector<1xf32>) -> ()
// CHECK-PROP: return
// CHECK-PROP: }
+
+// -----
+func.func @dedup_unused_result(%laneid : index) -> (vector<1xf32>) {
+ %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] ->
+ (vector<1xf32>, vector<2xf32>, vector<1xf32>) {
+ %2 = "some_def"() : () -> (vector<32xf32>)
+ %3 = "some_def"() : () -> (vector<64xf32>)
+ gpu.yield %2, %3, %2 : vector<32xf32>, vector<64xf32>, vector<32xf32>
+ }
+ %r0 = "some_use"(%r#2, %r#2) : (vector<1xf32>, vector<1xf32>) -> (vector<1xf32>)
+ return %r0 : vector<1xf32>
+}
+
+// CHECK-PROP: func @dedup_unused_result
+// CHECK-PROP: %[[R:.*]] = gpu.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>)
+// CHECK-PROP: %[[Y0:.*]] = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP: %[[Y1:.*]] = "some_def"() : () -> vector<64xf32>
+// CHECK-PROP: gpu.yield %[[Y0]] : vector<32xf32>
+// CHECK-PROP: "some_use"(%[[R]], %[[R]]) : (vector<1xf32>, vector<1xf32>) -> vector<1xf32>
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/global.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/global.mlir
index b9b3420..a25abbd 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/global.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/global.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s | FileCheck %s
module {
- wasmssa.import_global "from_js" from "env" as @global_0 nested : i32
+ wasmssa.import_global "from_js" from "env" as @global_0 : i32
wasmssa.global @global_1 i32 : {
%0 = wasmssa.const 10 : i32
@@ -21,7 +21,7 @@ module {
}
}
-// CHECK-LABEL: wasmssa.import_global "from_js" from "env" as @global_0 nested : i32
+// CHECK-LABEL: wasmssa.import_global "from_js" from "env" as @global_0 : i32
// CHECK-LABEL: wasmssa.global @global_1 i32 : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/if.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/if.mlir
index 01068cb..cee3c69 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/if.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/if.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt %s | FileCheck %s
-// CHECK-LABEL: wasmssa.func nested @func_0(
+// CHECK-LABEL: wasmssa.func @func_0(
// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
// CHECK: wasmssa.if %[[VAL_0]] : {
@@ -12,7 +12,7 @@
// CHECK: }> ^bb1
// CHECK: ^bb1(%[[VAL_3:.*]]: f32):
// CHECK: wasmssa.return %[[VAL_3]] : f32
-wasmssa.func nested @func_0(%arg0 : !wasmssa<local ref to i32>) -> i32 {
+wasmssa.func @func_0(%arg0 : !wasmssa<local ref to i32>) -> i32 {
%cond = wasmssa.local_get %arg0 : ref to i32
wasmssa.if %cond : {
%c0 = wasmssa.const 0.5 : f32
@@ -25,7 +25,7 @@ wasmssa.func nested @func_0(%arg0 : !wasmssa<local ref to i32>) -> i32 {
wasmssa.return %retVal : f32
}
-// CHECK-LABEL: wasmssa.func nested @func_1(
+// CHECK-LABEL: wasmssa.func @func_1(
// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
// CHECK: %[[VAL_1:.*]] = wasmssa.local of type i32
@@ -38,7 +38,7 @@ wasmssa.func nested @func_0(%arg0 : !wasmssa<local ref to i32>) -> i32 {
// CHECK: ^bb1:
// CHECK: %[[VAL_4:.*]] = wasmssa.local_get %[[VAL_1]] : ref to i32
// CHECK: wasmssa.return %[[VAL_4]] : i32
-wasmssa.func nested @func_1(%arg0 : !wasmssa<local ref to i32>) -> i32 {
+wasmssa.func @func_1(%arg0 : !wasmssa<local ref to i32>) -> i32 {
%cond = wasmssa.local_get %arg0 : ref to i32
%var = wasmssa.local of type i32
%zero = wasmssa.const 0
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/import.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/import.mlir
index 3cc0548..dc23229 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/import.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/import.mlir
@@ -5,13 +5,13 @@ module {
wasmssa.import_func "bar" from "my_module" as @func_1 {sym_visibility = "nested", type = (i32) -> ()}
wasmssa.import_table "table" from "my_module" as @table_0 {sym_visibility = "nested", type = !wasmssa<tabletype !wasmssa.funcref [2:]>}
wasmssa.import_mem "mem" from "my_module" as @mem_0 {limits = !wasmssa<limit[2:]>, sym_visibility = "nested"}
- wasmssa.import_global "glob" from "my_module" as @global_0 nested : i32
- wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable nested : i32
+ wasmssa.import_global "glob" from "my_module" as @global_0 : i32
+ wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable : i32
}
// CHECK-LABEL: wasmssa.import_func "foo" from "my_module" as @func_0 {sym_visibility = "nested", type = (i32) -> ()}
// CHECK: wasmssa.import_func "bar" from "my_module" as @func_1 {sym_visibility = "nested", type = (i32) -> ()}
// CHECK: wasmssa.import_table "table" from "my_module" as @table_0 {sym_visibility = "nested", type = !wasmssa<tabletype !wasmssa.funcref [2:]>}
// CHECK: wasmssa.import_mem "mem" from "my_module" as @mem_0 {limits = !wasmssa<limit[2:]>, sym_visibility = "nested"}
-// CHECK: wasmssa.import_global "glob" from "my_module" as @global_0 nested : i32
-// CHECK: wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable nested : i32
+// CHECK: wasmssa.import_global "glob" from "my_module" as @global_0 : i32
+// CHECK: wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable : i32
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/local.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/local.mlir
index 3f6423f..f613ebf 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/local.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/local.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s | FileCheck %s
module {
- wasmssa.func nested @func_0() -> f32 {
+ wasmssa.func @func_0() -> f32 {
%0 = wasmssa.local of type f32
%1 = wasmssa.local of type f32
%2 = wasmssa.const 8.000000e+00 : f32
@@ -9,7 +9,7 @@ module {
%4 = wasmssa.add %2 %3 : f32
wasmssa.return %4 : f32
}
- wasmssa.func nested @func_1() -> i32 {
+ wasmssa.func @func_1() -> i32 {
%0 = wasmssa.local of type i32
%1 = wasmssa.local of type i32
%2 = wasmssa.const 8 : i32
@@ -17,13 +17,13 @@ module {
%4 = wasmssa.add %2 %3 : i32
wasmssa.return %4 : i32
}
- wasmssa.func nested @func_2(%arg0: !wasmssa<local ref to i32>) -> i32 {
+ wasmssa.func @func_2(%arg0: !wasmssa<local ref to i32>) -> i32 {
%0 = wasmssa.const 3 : i32
wasmssa.return %0 : i32
}
}
-// CHECK-LABEL: wasmssa.func nested @func_0() -> f32 {
+// CHECK-LABEL: wasmssa.func @func_0() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local of type f32
// CHECK: %[[VAL_1:.*]] = wasmssa.local of type f32
// CHECK: %[[VAL_2:.*]] = wasmssa.const 8.000000e+00 : f32
@@ -31,7 +31,7 @@ module {
// CHECK: %[[VAL_4:.*]] = wasmssa.add %[[VAL_2]] %[[VAL_3]] : f32
// CHECK: wasmssa.return %[[VAL_4]] : f32
-// CHECK-LABEL: wasmssa.func nested @func_1() -> i32 {
+// CHECK-LABEL: wasmssa.func @func_1() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local of type i32
// CHECK: %[[VAL_1:.*]] = wasmssa.local of type i32
// CHECK: %[[VAL_2:.*]] = wasmssa.const 8 : i32
@@ -39,7 +39,7 @@ module {
// CHECK: %[[VAL_4:.*]] = wasmssa.add %[[VAL_2]] %[[VAL_3]] : i32
// CHECK: wasmssa.return %[[VAL_4]] : i32
-// CHECK-LABEL: wasmssa.func nested @func_2(
+// CHECK-LABEL: wasmssa.func @func_2(
// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 3 : i32
// CHECK: wasmssa.return %[[VAL_0]] : i32
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/memory.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/memory.mlir
index 47551db..ca6ebe0 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/memory.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/memory.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s | FileCheck %s
-// CHECK: wasmssa.memory @mem0 public !wasmssa<limit[0: 65536]>
-wasmssa.memory @mem0 public !wasmssa<limit[0:65536]>
-
-// CHECK: wasmssa.memory @mem1 nested !wasmssa<limit[512:]>
+// CHECK: wasmssa.memory @mem1 !wasmssa<limit[512:]>
wasmssa.memory @mem1 !wasmssa<limit[512:]>
+
+// CHECK: wasmssa.memory exported @mem2 !wasmssa<limit[0: 65536]>
+wasmssa.memory exported @mem2 !wasmssa<limit[0:65536]>
diff --git a/mlir/test/Dialect/WasmSSA/custom_parser/table.mlir b/mlir/test/Dialect/WasmSSA/custom_parser/table.mlir
index 5a874f4..ea630de 100644
--- a/mlir/test/Dialect/WasmSSA/custom_parser/table.mlir
+++ b/mlir/test/Dialect/WasmSSA/custom_parser/table.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s | FileCheck %s
-// CHECK: wasmssa.table @tab0 public !wasmssa<tabletype !wasmssa.externref [0: 65536]>
-wasmssa.table @tab0 public !wasmssa<tabletype !wasmssa.externref [0:65536]>
+// CHECK: wasmssa.table exported @tab0 !wasmssa<tabletype !wasmssa.externref [0: 65536]>
+wasmssa.table exported @tab0 !wasmssa<tabletype !wasmssa.externref [0:65536]>
-// CHECK: wasmssa.table @tab1 nested !wasmssa<tabletype !wasmssa.funcref [348:]>
+// CHECK: wasmssa.table @tab1 !wasmssa<tabletype !wasmssa.funcref [348:]>
wasmssa.table @tab1 !wasmssa<tabletype !wasmssa.funcref [348:]>
diff --git a/mlir/test/Dialect/WasmSSA/extend-invalid.mlir b/mlir/test/Dialect/WasmSSA/extend-invalid.mlir
index 8d78280..7687e5f 100644
--- a/mlir/test/Dialect/WasmSSA/extend-invalid.mlir
+++ b/mlir/test/Dialect/WasmSSA/extend-invalid.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s -split-input-file -verify-diagnostics
-wasmssa.func nested @extend_low_64() -> i32 {
+wasmssa.func @extend_low_64() -> i32 {
%0 = wasmssa.const 10 : i32
// expected-error@+1 {{extend op can only take 8, 16 or 32 bits. Got 64}}
%1 = wasmssa.extend 64 low bits from %0: i32
@@ -10,7 +10,7 @@ wasmssa.func nested @extend_low_64() -> i32 {
// -----
-wasmssa.func nested @extend_too_much() -> i32 {
+wasmssa.func @extend_too_much() -> i32 {
%0 = wasmssa.const 10 : i32
// expected-error@+1 {{trying to extend the 32 low bits from a 'i32' value is illegal}}
%1 = wasmssa.extend 32 low bits from %0: i32
diff --git a/mlir/test/Dialect/WasmSSA/global-invalid.mlir b/mlir/test/Dialect/WasmSSA/global-invalid.mlir
index b9cafd8..c5bc606 100644
--- a/mlir/test/Dialect/WasmSSA/global-invalid.mlir
+++ b/mlir/test/Dialect/WasmSSA/global-invalid.mlir
@@ -13,7 +13,7 @@ module {
// -----
module {
- wasmssa.import_global "glob" from "my_module" as @global_0 mutable nested : i32
+ wasmssa.import_global "glob" from "my_module" as @global_0 mutable : i32
wasmssa.global @global_1 i32 : {
// expected-error@+1 {{global.get op is considered constant if it's referring to a import.global symbol marked non-mutable}}
%0 = wasmssa.global_get @global_0 : i32
@@ -30,3 +30,13 @@ module {
wasmssa.return %0 : i32
}
}
+
+// -----
+
+module {
+ // expected-error@+1 {{expecting either `exported` or symbol name. got exproted}}
+ wasmssa.global exproted @global_1 i32 : {
+ %0 = wasmssa.const 17 : i32
+ wasmssa.return %0 : i32
+ }
+}
diff --git a/mlir/test/Dialect/WasmSSA/locals-invalid.mlir b/mlir/test/Dialect/WasmSSA/locals-invalid.mlir
index 35c590b..eaad80e 100644
--- a/mlir/test/Dialect/WasmSSA/locals-invalid.mlir
+++ b/mlir/test/Dialect/WasmSSA/locals-invalid.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt %s -split-input-file -verify-diagnostics
-wasmssa.func nested @local_set_err(%arg0: !wasmssa<local ref to i32>) -> i64 {
+wasmssa.func @local_set_err(%arg0: !wasmssa<local ref to i32>) -> i64 {
%0 = wasmssa.const 3 : i64
// expected-error@+1 {{input type and result type of local.set do not match}}
wasmssa.local_set %arg0 : ref to i32 to %0 : i64
@@ -9,7 +9,7 @@ wasmssa.func nested @local_set_err(%arg0: !wasmssa<local ref to i32>) -> i64 {
// -----
-wasmssa.func nested @local_tee_err(%arg0: !wasmssa<local ref to i32>) -> i32 {
+wasmssa.func @local_tee_err(%arg0: !wasmssa<local ref to i32>) -> i32 {
%0 = wasmssa.const 3 : i64
// expected-error@+1 {{input type and output type of local.tee do not match}}
%1 = wasmssa.local_tee %arg0 : ref to i32 to %0 : i64
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index 228ef69d..ebbe3ce 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -858,7 +858,7 @@ func.func @load_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>
// -----
func.func @load_mem_desc_invalid_result_size(%arg0: !xegpu.mem_desc<16x64xf16>) {
- // expected-error@+1 {{result shape must not exceed mem_desc shape}}
+ // expected-error@+1 {{data shape must not exceed mem_desc shape}}
%data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<32x16xf16>
return
}
@@ -871,6 +871,14 @@ func.func @load_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>) {
}
// -----
+func.func @load_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>) {
+ // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
+ %data2 = xegpu.load_matrix %arg0[8, 8] <{subgroup_block_io}>: !xegpu.mem_desc<16x64xf16> -> vector<16x16xf16>
+ return
+}
+
+
+// -----
func.func @store_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf32>) {
// expected-error@+1 {{failed to verify that all of {mem_desc, data} have same element type}}
xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.mem_desc<16x64xf16>
@@ -892,30 +900,16 @@ func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: ve
}
// -----
-func.func @mem_desc_subview_size_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) {
- // expected-error@+1 {{result shape must not exceed source shape}}
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<32x16xf16>
- return
-}
-
-// -----
-func.func @mem_desc_subview_layout_mismatch(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride =[1, 16]>>) {
- // expected-error@+1 {{result must inherit the source strides}}
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride =[1, 16]>> -> !xegpu.mem_desc<8x16xf16>
- return
-}
-
-// -----
-func.func @mem_desc_subview_element_type_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) {
- // expected-error@+1 {{failed to verify that all of {src, res} have same element type}}
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf32, #xegpu.mem_layout<stride =[64, 1]>>
+func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) {
+ // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
+ xegpu.store_matrix %data, %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
return
}
// -----
-func.func @mem_desc_subview_rank_mismatch(%arg0: !xegpu.mem_desc<16x64xf16>) {
- // expected-error@+1 {{result rank must not exceed source rank}}
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<4x8x16xf16>
+func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) {
+ // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
+ xegpu.store_matrix %data, %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
return
}
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
index bb37902..0a10f68 100644
--- a/mlir/test/Dialect/XeGPU/ops.mlir
+++ b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -825,53 +825,73 @@ gpu.func @create_mem_desc_with_stride() {
gpu.return
}
-// CHECK: gpu.func @load_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>)
-gpu.func @load_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>) {
+// CHECK: gpu.func @load_matrix([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>)
+gpu.func @load_matrix(%arg0: !xegpu.mem_desc<16x64xf16>) {
// CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16>
%data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> vector<8x16xf16>
gpu.return
}
-// CHECK: gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>)
-gpu.func @load_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>) {
+// CHECK: gpu.func @load_matrix_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>)
+gpu.func @load_matrix_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>) {
// CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> vector<8x16xf16>
%data = xegpu.load_matrix %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> vector<8x16xf16>
gpu.return
}
+// CHECK: gpu.func @simt_load_matrix(%arg0: !xegpu.mem_desc<16x64xf16>)
+gpu.func @simt_load_matrix(%arg0: !xegpu.mem_desc<16x64xf16>) {
+ // CHECK: xegpu.load_matrix [[ARG0]][8, 16] : !xegpu.mem_desc<16x64xf16> -> vector<1xf16>
+ %data = xegpu.load_matrix %arg0[8, 16]: !xegpu.mem_desc<16x64xf16> -> vector<1xf16>
+ gpu.return
+}
+
+// CHECK: gpu.func @simt_load_matrix_subgroup_block_io(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>)
+gpu.func @simt_load_matrix_subgroup_block_io(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>) {
+ // CHECK: xegpu.load_matrix [[ARG0]][8, 16] <{subgroup_block_io}>: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>> -> vector<8xf16>
+ %data = xegpu.load_matrix %arg0[8, 16] <{subgroup_block_io}>: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>> -> vector<8xf16>
+ gpu.return
+}
+
+// CHECK: gpu.func @simt_load_matrix_vector(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>)
+gpu.func @simt_load_matrix_vector(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>) {
+ // CHECK: xegpu.load_matrix [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> vector<8xf16>
+ %data = xegpu.load_matrix %arg0[8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> vector<8xf16>
+ gpu.return
+}
-// CHECK: gpu.func @store_mem_desc([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
-gpu.func @store_mem_desc(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
+// CHECK: gpu.func @store_matrix([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf16>) {
// CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
xegpu.store_matrix %arg1, %arg0[8, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
gpu.return
}
-// CHECK: gpu.func @store_mem_desc_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
-gpu.func @store_mem_desc_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, %arg1: vector<16x16xf16>) {
+// CHECK: gpu.func @store_matrix_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, [[ARG1:%.+]]: vector<16x16xf16>)
+gpu.func @store_matrix_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, %arg1: vector<16x16xf16>) {
// CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][0, 8] : vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>
xegpu.store_matrix %arg1, %arg0[0, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>
gpu.return
}
-// CHECK: gpu.func @mem_desc_subview([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>)
-gpu.func @mem_desc_subview(%arg0: !xegpu.mem_desc<16x64xf16>) {
- //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout<stride = [64, 1]>>
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout<stride = [64, 1]>>
+// CHECK: gpu.func @simt_store_matrix(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<1xf16>) {
+gpu.func @simt_store_matrix(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<1xf16>) {
+ // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 16] : vector<1xf16>, !xegpu.mem_desc<16x64xf16>
+ xegpu.store_matrix %arg1, %arg0[8, 16]: vector<1xf16>, !xegpu.mem_desc<16x64xf16>
gpu.return
}
-// CHECK: gpu.func @mem_desc_subview_lower_rank([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16>)
-gpu.func @mem_desc_subview_lower_rank(%arg0: !xegpu.mem_desc<16x64xf16>) {
- //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout<stride = [64, 1]>>
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16> -> !xegpu.mem_desc<16xf16, #xegpu.mem_layout<stride = [64, 1]>>
+// CHECK: gpu.func @simt_store_matrix_subgroup_block_io(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>, %arg1: vector<8xf16>)
+gpu.func @simt_store_matrix_subgroup_block_io(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>, %arg1: vector<8xf16>) {
+ // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 16] <{subgroup_block_io}>: vector<8xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>
+ xegpu.store_matrix %arg1, %arg0[8, 16] <{subgroup_block_io}>: vector<8xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<block = [16, 16]>>
gpu.return
}
-// CHECK: gpu.func @mem_desc_subview_with_stride([[ARG0:%.+]]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>)
-gpu.func @mem_desc_subview_with_stride(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>) {
- //CHECK: xegpu.mem_desc_subview [[ARG0]][8, 8] : !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout<stride = [1, 16]>>
- %data = xegpu.mem_desc_subview %arg0[8, 8]: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>> -> !xegpu.mem_desc<8x16xf16, #xegpu.mem_layout<stride = [1, 16]>>
+// CHECK: gpu.func @simt_store_matrix_vector(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, %arg1: vector<8xf16>) {
+gpu.func @simt_store_matrix_vector(%arg0: !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>, %arg1: vector<8xf16>) {
+ // CHECK: xegpu.store_matrix [[ARG1]], [[ARG0]][8, 8] : vector<8xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>
+ xegpu.store_matrix %arg1, %arg0[8, 8] : vector<8xf16>, !xegpu.mem_desc<16x64xf16, #xegpu.mem_layout<stride = [1, 16]>>
gpu.return
}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9d63c2d..fe4f44c 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -584,3 +584,101 @@ gpu.module @test_kernel {
gpu.return
}
}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets
+ // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32>
+ gpu.func @load_with_offsets(%src: ui64) -> vector<32xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout_result_0 = #xegpu.layout<inst_data = [16]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+
+ gpu.return %ld : vector<32xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets
+ // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.0>: vector<32xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 1, layout_operand_0 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+
+ gpu.return
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32>
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32>
+ gpu.func @load_with_offsets_chunk(%src: ui64) -> vector<32x4xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 4, layout_result_0 = #xegpu.layout<inst_data = [16, 2]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32x4xf32>
+ gpu.return %ld : vector<32x4xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets_chunk(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.>: vector<32x4xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 4, layout_operand_0 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16, 2]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32x4xf32>, ui64, vector<32xindex>, vector<32xi1>
+ gpu.return
+ }
+}
diff --git a/mlir/test/Examples/standalone/test.wheel.toy b/mlir/test/Examples/standalone/test.wheel.toy
index 5ff9271..c8d188a 100644
--- a/mlir/test/Examples/standalone/test.wheel.toy
+++ b/mlir/test/Examples/standalone/test.wheel.toy
@@ -2,6 +2,7 @@
# than 255 chars when combined with the fact that pip wants to install into a tmp directory buried under
# C/Users/ContainerAdministrator/AppData/Local/Temp.
# UNSUPPORTED: target={{.*(windows).*}}
+# REQUIRES: expensive_checks
# REQUIRES: non-shared-libs-build
# REQUIRES: bindings-python
diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
index cb16c37..b3154d4 100644
--- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
+++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
@@ -3,7 +3,16 @@
// RUN: | FileCheck %s
// CHECK: data =
-// CHECK-RAW: [[[7.7, 0, 0], [7.7, 0, 0], [7.7, 0, 0]], [[0, 7.7, 0], [0, 7.7, 0], [0, 7.7, 0]], [[0, 0, 7.7], [0, 0, 7.7], [0, 0, 7.7]]]
+// CHECK{LITERAL}: [[[7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0]],
+// CHECK{LITERAL}: [[0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0]],
+// CHECK{LITERAL}: [[0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7]]]
+
module attributes {
gpu.container_module,
spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 8a0390a..8116044 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -17,7 +17,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%matmul = transform.structured.match ops{["linalg.matmul"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %matmul [10, 20]
+ %a, %b, %c = transform.structured.fuse %matmul tile_sizes [10, 20]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -69,7 +69,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %generic [10, 20]
+ %a, %b, %c = transform.structured.fuse %generic tile_sizes [10, 20]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -125,7 +125,7 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> !transform.any_op
%mm1, %mm2 = transform.split_handle %matmuls
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
- %a, %b = transform.structured.fuse %mm2 [10]
+ %a, %b = transform.structured.fuse %mm2 tile_sizes [10]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -188,7 +188,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %generic [10, 20]
+ %a, %b, %c = transform.structured.fuse %generic tile_sizes [10, 20]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -248,7 +248,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %generic [10, 20] interchange[1, 0]
+ %a, %b, %c = transform.structured.fuse %generic tile_sizes [10, 20] interchange[1, 0]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -307,7 +307,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %generic [10, 20]
+ %a, %b, %c = transform.structured.fuse %generic tile_sizes [10, 20]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -367,7 +367,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b, %c = transform.structured.fuse %generic [10, 20]
+ %a, %b, %c = transform.structured.fuse %generic tile_sizes [10, 20]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
@@ -423,7 +423,7 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> !transform.any_op
%mm1, %mm2, %mm3 = transform.split_handle %matmuls
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- %a, %b = transform.structured.fuse %mm3 [10]
+ %a, %b = transform.structured.fuse %mm3 tile_sizes [10]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -512,7 +512,7 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> !transform.any_op
%generic1, %generic2, %generic3 = transform.split_handle %generics
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- %a, %b = transform.structured.fuse %generic3 [10]
+ %a, %b = transform.structured.fuse %generic3 tile_sizes [10]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -568,7 +568,7 @@ module attributes {transform.with_named_sequence} {
: (!transform.any_op) -> !transform.any_op
%pad = transform.structured.match ops{["tensor.pad"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b = transform.structured.fuse %pad [8]
+ %a, %b = transform.structured.fuse %pad tile_sizes [8]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -614,7 +614,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%matmul = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %b = transform.structured.fuse %matmul [0, 1, 0]
+ %a, %b = transform.structured.fuse %matmul tile_sizes [0, 1, 0]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.yield
}
@@ -652,7 +652,7 @@ module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1 : !transform.any_op {transform.readonly}) {
%generic = transform.structured.match ops{["linalg.generic"]} in %arg1
: (!transform.any_op) -> !transform.any_op
- %a, %loops:4 = transform.structured.fuse %generic {tile_sizes = [1, 16, 16, 16], tile_interchange = [0, 1, 2, 3], apply_cleanup = false}
+ %a, %loops:4 = transform.structured.fuse %generic tile_sizes [1, 16, 16, 16] interchange [0, 1, 2, 3]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir
new file mode 100644
index 0000000..325271e
--- /dev/null
+++ b/mlir/test/Pass/remark-final.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml
+// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml
+module @foo {
+ "test.op"() : () -> ()
+
+}
+
+// CHECK-YAML-NOT: This is a test passed remark (should be dropped)
+// CHECK-YAML-DAG: !Analysis
+// CHECK-YAML-DAG: !Failure
+// CHECK-YAML-DAG: !Passed
+
+// CHECK-NOT: This is a test passed remark (should be dropped)
+// CHECK-DAG: remark: [Analysis] test-remark
+// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed
+// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed
diff --git a/mlir/test/Target/Cpp/expressions.mlir b/mlir/test/Target/Cpp/expressions.mlir
index 4281f41..9f1c816 100644
--- a/mlir/test/Target/Cpp/expressions.mlir
+++ b/mlir/test/Target/Cpp/expressions.mlir
@@ -315,16 +315,13 @@ func.func @different_expressions(%arg0: i32, %arg1: i32, %arg2: i32, %arg3: i32)
}
// CPP-DEFAULT: int32_t expression_with_dereference(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2]]) {
-// CPP-DEFAULT-NEXT: int32_t [[VAL_3:v[0-9]+]] = *([[VAL_2]] - [[VAL_1]]);
-// CPP-DEFAULT-NEXT: return [[VAL_3]];
+// CPP-DEFAULT-NEXT: return *([[VAL_2]] - [[VAL_1]]);
// CPP-DEFAULT-NEXT: }
// CPP-DECLTOP: int32_t expression_with_dereference(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2]]) {
-// CPP-DECLTOP-NEXT: int32_t [[VAL_3:v[0-9]+]];
-// CPP-DECLTOP-NEXT: [[VAL_3]] = *([[VAL_2]] - [[VAL_1]]);
-// CPP-DECLTOP-NEXT: return [[VAL_3]];
+// CPP-DECLTOP-NEXT: return *([[VAL_2]] - [[VAL_1]]);
// CPP-DECLTOP-NEXT: }
-func.func @expression_with_dereference(%arg1: i32, %arg2: !emitc.ptr<i32>) -> i32 {
+emitc.func @expression_with_dereference(%arg1: i32, %arg2: !emitc.ptr<i32>) -> i32 {
%c = emitc.expression %arg1, %arg2 : (i32, !emitc.ptr<i32>) -> i32 {
%e = emitc.sub %arg2, %arg1 : (!emitc.ptr<i32>, i32) -> !emitc.ptr<i32>
%d = emitc.apply "*"(%e) : (!emitc.ptr<i32>) -> i32
@@ -384,19 +381,16 @@ func.func @expression_with_subscript_user(%arg0: !emitc.ptr<!emitc.opaque<"void"
// CPP-DEFAULT: bool expression_with_load(int32_t [[VAL_1:v.+]], int32_t [[VAL_2:v.+]], int32_t* [[VAL_3:v.+]]) {
// CPP-DEFAULT-NEXT: int64_t [[VAL_4:v.+]] = 0;
// CPP-DEFAULT-NEXT: int32_t [[VAL_5:v.+]] = 42;
-// CPP-DEFAULT-NEXT: bool [[VAL_6:v.+]] = [[VAL_5]] + [[VAL_2]] < [[VAL_3]][[[VAL_4]]] + [[VAL_1]];
-// CPP-DEFAULT-NEXT: return [[VAL_6]];
+// CPP-DEFAULT-NEXT: return [[VAL_5]] + [[VAL_2]] < [[VAL_3]][[[VAL_4]]] + [[VAL_1]];
// CPP-DECLTOP: bool expression_with_load(int32_t [[VAL_1:v.+]], int32_t [[VAL_2:v.+]], int32_t* [[VAL_3:v.+]]) {
// CPP-DECLTOP-NEXT: int64_t [[VAL_4:v.+]];
// CPP-DECLTOP-NEXT: int32_t [[VAL_5:v.+]];
-// CPP-DECLTOP-NEXT: bool [[VAL_6:v.+]];
// CPP-DECLTOP-NEXT: [[VAL_4]] = 0;
// CPP-DECLTOP-NEXT: [[VAL_5]] = 42;
-// CPP-DECLTOP-NEXT: [[VAL_6]] = [[VAL_5]] + [[VAL_2]] < [[VAL_3]][[[VAL_4]]] + [[VAL_1]];
-// CPP-DECLTOP-NEXT: return [[VAL_6]];
+// CPP-DECLTOP-NEXT: return [[VAL_5]] + [[VAL_2]] < [[VAL_3]][[[VAL_4]]] + [[VAL_1]];
-func.func @expression_with_load(%arg0: i32, %arg1: i32, %arg2: !emitc.ptr<i32>) -> i1 {
+emitc.func @expression_with_load(%arg0: i32, %arg1: i32, %arg2: !emitc.ptr<i32>) -> i1 {
%c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
%0 = "emitc.variable"() <{value = #emitc.opaque<"42">}> : () -> !emitc.lvalue<i32>
%ptr = emitc.subscript %arg2[%c0] : (!emitc.ptr<i32>, i64) -> !emitc.lvalue<i32>
@@ -408,22 +402,19 @@ func.func @expression_with_load(%arg0: i32, %arg1: i32, %arg2: !emitc.ptr<i32>)
%e = emitc.cmp lt, %b, %d :(i32, i32) -> i1
yield %e : i1
}
- return %result : i1
+ emitc.return %result : i1
}
// CPP-DEFAULT: bool expression_with_load_and_call(int32_t* [[VAL_1:v.+]]) {
// CPP-DEFAULT-NEXT: int64_t [[VAL_2:v.+]] = 0;
-// CPP-DEFAULT-NEXT: bool [[VAL_3:v.+]] = [[VAL_1]][[[VAL_2]]] + bar([[VAL_1]][[[VAL_2]]]) < [[VAL_1]][[[VAL_2]]];
-// CPP-DEFAULT-NEXT: return [[VAL_3]];
+// CPP-DEFAULT-NEXT: return [[VAL_1]][[[VAL_2]]] + bar([[VAL_1]][[[VAL_2]]]) < [[VAL_1]][[[VAL_2]]];
// CPP-DECLTOP: bool expression_with_load_and_call(int32_t* [[VAL_1:v.+]]) {
// CPP-DECLTOP-NEXT: int64_t [[VAL_2:v.+]];
-// CPP-DECLTOP-NEXT: bool [[VAL_3:v.+]];
// CPP-DECLTOP-NEXT: [[VAL_2]] = 0;
-// CPP-DECLTOP-NEXT: [[VAL_3]] = [[VAL_1]][[[VAL_2]]] + bar([[VAL_1]][[[VAL_2]]]) < [[VAL_1]][[[VAL_2]]];
-// CPP-DECLTOP-NEXT: return [[VAL_3]];
+// CPP-DECLTOP-NEXT: return [[VAL_1]][[[VAL_2]]] + bar([[VAL_1]][[[VAL_2]]]) < [[VAL_1]][[[VAL_2]]];
-func.func @expression_with_load_and_call(%arg0: !emitc.ptr<i32>) -> i1 {
+emitc.func @expression_with_load_and_call(%arg0: !emitc.ptr<i32>) -> i1 {
%c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
%ptr = emitc.subscript %arg0[%c0] : (!emitc.ptr<i32>, i64) -> !emitc.lvalue<i32>
%result = emitc.expression %ptr : (!emitc.lvalue<i32>) -> i1 {
@@ -435,7 +426,7 @@ func.func @expression_with_load_and_call(%arg0: !emitc.ptr<i32>) -> i1 {
%f = emitc.cmp lt, %e, %b :(i32, i32) -> i1
yield %f : i1
}
- return %result : i1
+ emitc.return %result : i1
}
@@ -458,3 +449,204 @@ emitc.func @expression_with_call_opaque_with_args_array(%0 : i32, %1 : i32) {
}
return
}
+
+// CPP-DEFAULT: void inline_side_effects_into_assign(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DEFAULT-NEXT: int64_t [[VAL_3:v[0-9]+]] = 0;
+// CPP-DEFAULT-NEXT: int32_t [[VAL_4:v[0-9]+]] = 42;
+// CPP-DEFAULT-NEXT: [[VAL_4]] = [[VAL_4]] * [[VAL_1]] + [[VAL_2]][[[VAL_3]]];
+// CPP-DEFAULT-NEXT: return;
+// CPP-DEFAULT-NEXT: }
+
+// CPP-DECLTOP: void inline_side_effects_into_assign(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DECLTOP-NEXT: int64_t [[VAL_3:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DECLTOP-NEXT: [[VAL_3]] = 0;
+// CPP-DECLTOP-NEXT: [[VAL_4]] = 42;
+// CPP-DECLTOP-NEXT: [[VAL_4]] = [[VAL_4]] * [[VAL_1]] + [[VAL_2]][[[VAL_3]]];
+// CPP-DECLTOP-NEXT: return;
+// CPP-DECLTOP-NEXT: }
+
+emitc.func @inline_side_effects_into_assign(%arg0: i32, %arg1: !emitc.ptr<i32>) {
+ %c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
+ %0 = "emitc.variable"() <{value = #emitc.opaque<"42">}> : () -> !emitc.lvalue<i32>
+ %ptr = emitc.subscript %arg1[%c0] : (!emitc.ptr<i32>, i64) -> !emitc.lvalue<i32>
+ %result = emitc.expression %arg0, %0, %ptr : (i32, !emitc.lvalue<i32>, !emitc.lvalue<i32>) -> i32 {
+ %a = emitc.load %0 : !emitc.lvalue<i32>
+ %b = emitc.mul %a, %arg0 : (i32, i32) -> i32
+ %c = emitc.load %ptr : !emitc.lvalue<i32>
+ %d = emitc.add %b, %c : (i32, i32) -> i32
+ yield %d : i32
+ }
+ emitc.assign %result : i32 to %0 : !emitc.lvalue<i32>
+ emitc.return
+}
+
+// CPP-DEFAULT: void do_not_inline_side_effects_into_assign(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DEFAULT-NEXT: int64_t [[VAL_3:v[0-9]+]] = 0;
+// CPP-DEFAULT-NEXT: int32_t [[VAL_4:v[0-9]+]] = 42;
+// CPP-DEFAULT-NEXT: int32_t [[VAL_5:v[0-9]+]] = [[VAL_4]] * [[VAL_1]];
+// CPP-DEFAULT-NEXT: [[VAL_2]][[[VAL_3]]] = [[VAL_5]];
+// CPP-DEFAULT-NEXT: return;
+// CPP-DEFAULT-NEXT: }
+
+// CPP-DECLTOP: void do_not_inline_side_effects_into_assign(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DECLTOP-NEXT: int64_t [[VAL_3:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_5:v[0-9]+]];
+// CPP-DECLTOP-NEXT: [[VAL_3]] = 0;
+// CPP-DECLTOP-NEXT: [[VAL_4]] = 42;
+// CPP-DECLTOP-NEXT: [[VAL_5:v[0-9]+]] = [[VAL_4]] * [[VAL_1]];
+// CPP-DECLTOP-NEXT: [[VAL_2]][[[VAL_3]]] = [[VAL_5]];
+// CPP-DECLTOP-NEXT: return;
+// CPP-DECLTOP-NEXT: }
+
+emitc.func @do_not_inline_side_effects_into_assign(%arg0: i32, %arg1: !emitc.ptr<i32>) {
+ %c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
+ %0 = "emitc.variable"() <{value = #emitc.opaque<"42">}> : () -> !emitc.lvalue<i32>
+ %ptr = emitc.subscript %arg1[%c0] : (!emitc.ptr<i32>, i64) -> !emitc.lvalue<i32>
+ %result = emitc.expression %arg0, %0 : (i32, !emitc.lvalue<i32>) -> i32 {
+ %a = emitc.load %0 : !emitc.lvalue<i32>
+ %b = emitc.mul %a, %arg0 : (i32, i32) -> i32
+ yield %b : i32
+ }
+ emitc.assign %result : i32 to %ptr : !emitc.lvalue<i32>
+ emitc.return
+}
+
+// CPP-DEFAULT: int32_t do_not_inline_non_preceding_side_effects(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DEFAULT-NEXT: int64_t [[VAL_3:v[0-9]+]] = 0;
+// CPP-DEFAULT-NEXT: int32_t [[VAL_4:v[0-9]+]] = 42;
+// CPP-DEFAULT-NEXT: int32_t [[VAL_5:v[0-9]+]] = [[VAL_4]] * [[VAL_1]];
+// CPP-DEFAULT-NEXT: [[VAL_2]][[[VAL_3]]] = [[VAL_1]];
+// CPP-DEFAULT-NEXT: return [[VAL_5]];
+// CPP-DEFAULT-NEXT: }
+
+// CPP-DECLTOP: int32_t do_not_inline_non_preceding_side_effects(int32_t [[VAL_1:v[0-9]+]], int32_t* [[VAL_2:v[0-9]+]]) {
+// CPP-DECLTOP-NEXT: int64_t [[VAL_3:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_5:v[0-9]+]];
+// CPP-DECLTOP-NEXT: [[VAL_3:v[0-9]+]] = 0;
+// CPP-DECLTOP-NEXT: [[VAL_4:v[0-9]+]] = 42;
+// CPP-DECLTOP-NEXT: [[VAL_5:v[0-9]+]] = [[VAL_4]] * [[VAL_1]];
+// CPP-DECLTOP-NEXT: [[VAL_2]][[[VAL_3]]] = [[VAL_1]];
+// CPP-DECLTOP-NEXT: return [[VAL_5]];
+// CPP-DECLTOP-NEXT: }
+
+emitc.func @do_not_inline_non_preceding_side_effects(%arg0: i32, %arg1: !emitc.ptr<i32>) -> i32 {
+ %c0 = "emitc.constant"() {value = 0 : i64} : () -> i64
+ %0 = "emitc.variable"() <{value = #emitc.opaque<"42">}> : () -> !emitc.lvalue<i32>
+ %ptr = emitc.subscript %arg1[%c0] : (!emitc.ptr<i32>, i64) -> !emitc.lvalue<i32>
+ %result = emitc.expression %arg0, %0 : (i32, !emitc.lvalue<i32>) -> i32 {
+ %a = emitc.load %0 : !emitc.lvalue<i32>
+ %b = emitc.mul %a, %arg0 : (i32, i32) -> i32
+ yield %b : i32
+ }
+ emitc.assign %arg0 : i32 to %ptr : !emitc.lvalue<i32>
+ emitc.return %result : i32
+}
+
+// CPP-DEFAULT: int32_t inline_side_effects_into_if(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
+// CPP-DEFAULT-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DEFAULT-NEXT: if (bar([[VAL_1]], [[VAL_2]]) < [[VAL_3]]) {
+// CPP-DEFAULT-NEXT: [[VAL_4]] = [[VAL_1]];
+// CPP-DEFAULT-NEXT: } else {
+// CPP-DEFAULT-NEXT: [[VAL_4]] = [[VAL_2]];
+// CPP-DEFAULT-NEXT: }
+// CPP-DEFAULT-NEXT: int32_t [[VAL_5:v[0-9]+]] = [[VAL_4]];
+// CPP-DEFAULT-NEXT: return [[VAL_5]];
+// CPP-DEFAULT-NEXT: }
+
+// CPP-DECLTOP: int32_t inline_side_effects_into_if(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
+// CPP-DECLTOP-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_5:v[0-9]+]];
+// CPP-DECLTOP-NEXT: ;
+// CPP-DECLTOP-NEXT: if (bar([[VAL_1]], [[VAL_2]]) < [[VAL_3]]) {
+// CPP-DECLTOP-NEXT: [[VAL_4]] = [[VAL_1]];
+// CPP-DECLTOP-NEXT: } else {
+// CPP-DECLTOP-NEXT: [[VAL_4]] = [[VAL_2]];
+// CPP-DECLTOP-NEXT: }
+// CPP-DECLTOP-NEXT: [[VAL_5]] = [[VAL_4]];
+// CPP-DECLTOP-NEXT: return [[VAL_5]];
+// CPP-DECLTOP-NEXT: }
+
+func.func @inline_side_effects_into_if(%arg0: i32, %arg1: i32, %arg2: i32) -> i32 {
+ %v = "emitc.variable"(){value = #emitc.opaque<"">} : () -> !emitc.lvalue<i32>
+ %cond = emitc.expression %arg0, %arg1, %arg2 : (i32, i32, i32) -> i1 {
+ %a = emitc.call_opaque "bar" (%arg0, %arg1) : (i32, i32) -> (i32)
+ %b = emitc.cmp lt, %a, %arg2 :(i32, i32) -> i1
+ emitc.yield %b : i1
+ }
+ emitc.if %cond {
+ emitc.assign %arg0 : i32 to %v : !emitc.lvalue<i32>
+ emitc.yield
+ } else {
+ emitc.assign %arg1 : i32 to %v : !emitc.lvalue<i32>
+ emitc.yield
+ }
+ %v_load = emitc.load %v : !emitc.lvalue<i32>
+ return %v_load : i32
+}
+
+// CPP-DEFAULT: void inline_side_effects_into_switch(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
+// CPP-DEFAULT-NEXT: switch (bar([[VAL_1]], [[VAL_2]]) + [[VAL_3]]) {
+// CPP-DEFAULT-NEXT: case 2: {
+// CPP-DEFAULT-NEXT: int32_t [[VAL_4:v[0-9]+]] = func_b();
+// CPP-DEFAULT-NEXT: break;
+// CPP-DEFAULT-NEXT: }
+// CPP-DEFAULT-NEXT: case 5: {
+// CPP-DEFAULT-NEXT: int32_t [[VAL_5:v[0-9]+]] = func_a();
+// CPP-DEFAULT-NEXT: break;
+// CPP-DEFAULT-NEXT: }
+// CPP-DEFAULT-NEXT: default: {
+// CPP-DEFAULT-NEXT: float [[VAL_6:v[0-9]+]] = 4.200000000e+01f;
+// CPP-DEFAULT-NEXT: func2([[VAL_6]]);
+// CPP-DEFAULT-NEXT: break;
+// CPP-DEFAULT-NEXT: }
+// CPP-DEFAULT-NEXT: }
+// CPP-DEFAULT-NEXT: return;
+// CPP-DEFAULT-NEXT: }
+
+// CPP-DECLTOP: void inline_side_effects_into_switch(int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]], int32_t [[VAL_3:v[0-9]+]]) {
+// CPP-DECLTOP-NEXT: float [[VAL_6:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_4:v[0-9]+]];
+// CPP-DECLTOP-NEXT: int32_t [[VAL_5:v[0-9]+]];
+// CPP-DECLTOP-NEXT: switch (bar([[VAL_1]], [[VAL_2]]) + [[VAL_3]]) {
+// CPP-DECLTOP-NEXT: case 2: {
+// CPP-DECLTOP-NEXT: [[VAL_4]] = func_b();
+// CPP-DECLTOP-NEXT: break;
+// CPP-DECLTOP-NEXT: }
+// CPP-DECLTOP-NEXT: case 5: {
+// CPP-DECLTOP-NEXT: [[VAL_5]] = func_a();
+// CPP-DECLTOP-NEXT: break;
+// CPP-DECLTOP-NEXT: }
+// CPP-DECLTOP-NEXT: default: {
+// CPP-DECLTOP-NEXT: [[VAL_6]] = 4.200000000e+01f;
+// CPP-DECLTOP-NEXT: func2([[VAL_6]]);
+// CPP-DECLTOP-NEXT: break;
+// CPP-DECLTOP-NEXT: }
+// CPP-DECLTOP-NEXT: }
+// CPP-DECLTOP-NEXT: return;
+// CPP-DECLTOP-NEXT: }
+
+func.func @inline_side_effects_into_switch(%arg0: i32, %arg1: i32, %arg2: i32) {
+ %0 = emitc.expression %arg0, %arg1, %arg2 : (i32, i32, i32) -> i32 {
+ %a = emitc.call_opaque "bar" (%arg0, %arg1) : (i32, i32) -> (i32)
+ %b = emitc.add %a, %arg2 :(i32, i32) -> i32
+ emitc.yield %b : i32
+ }
+ emitc.switch %0 : i32
+ case 2 {
+ %1 = emitc.call_opaque "func_b" () : () -> i32
+ emitc.yield
+ }
+ case 5 {
+ %2 = emitc.call_opaque "func_a" () : () -> i32
+ emitc.yield
+ }
+ default {
+ %3 = "emitc.constant"(){value = 42.0 : f32} : () -> f32
+ emitc.call_opaque "func2" (%3) : (f32) -> ()
+ emitc.yield
+ }
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll
index e056e43..61376b8 100644
--- a/mlir/test/Target/LLVMIR/Import/debug-info.ll
+++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll
@@ -240,11 +240,10 @@ define void @subprogram() !dbg !3 {
define void @func_loc() !dbg !3 {
ret void
}
-; CHECK-DAG: #[[NAME_LOC:.+]] = loc("func_loc")
; CHECK-DAG: #[[FILE_LOC:.+]] = loc("debug-info.ll":42:0)
; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #{{.*}}, name = "func_loc", file = #{{.*}}, line = 42, subprogramFlags = Definition>
-; CHECK: loc(fused<#[[SP]]>[#[[NAME_LOC]], #[[FILE_LOC]]]
+; CHECK: loc(fused<#[[SP]]>[#[[FILE_LOC]]]
!llvm.dbg.cu = !{!1}
!llvm.module.flags = !{!0}
diff --git a/mlir/test/Target/LLVMIR/Import/function-attributes.ll b/mlir/test/Target/LLVMIR/Import/function-attributes.ll
index cc3d799..00d09ba 100644
--- a/mlir/test/Target/LLVMIR/Import/function-attributes.ll
+++ b/mlir/test/Target/LLVMIR/Import/function-attributes.ll
@@ -393,6 +393,12 @@ declare void @alwaysinline_attribute() alwaysinline
// -----
+; CHECK-LABEL: @inlinehint_attribute
+; CHECK-SAME: attributes {inline_hint}
+declare void @inlinehint_attribute() inlinehint
+
+// -----
+
; CHECK-LABEL: @optnone_attribute
; CHECK-SAME: attributes {no_inline, optimize_none}
declare void @optnone_attribute() noinline optnone
diff --git a/mlir/test/Target/LLVMIR/amx.mlir b/mlir/test/Target/LLVMIR/amx.mlir
index abdf2fe..160a9ce 100644
--- a/mlir/test/Target/LLVMIR/amx.mlir
+++ b/mlir/test/Target/LLVMIR/amx.mlir
@@ -23,6 +23,19 @@ func.func @amx_tile_load_store(%base: memref<?x?xi8>, %out: memref<?x?xi8>,
return
}
+// CHECK-LABEL: define void @amx_tile_load_store_strided
+func.func @amx_tile_load_store_strided(%base: memref<?xi8>, %out: memref<?xi8>,
+ %idx: index, %stride: index)
+{
+ // CHECK: call x86_amx @llvm.x86.tileloadd64.internal
+ // CHECK: call void @llvm.x86.tilestored64.internal
+ %val = amx.tile_load %base[%idx], %stride
+ : memref<?xi8> into !amx.tile<16x64xi8>
+ amx.tile_store %out[%idx], %val, %stride
+ : memref<?xi8>, !amx.tile<16x64xi8>
+ return
+}
+
// CHECK-LABEL: define void @amx_tile_mulf_bf16
func.func @amx_tile_mulf_bf16(
%matA: memref<?x?xbf16>, %matB: memref<?x?xbf16>, %idx: index,
diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir
index 69814f2..cc243c8 100644
--- a/mlir/test/Target/LLVMIR/llvmir.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir.mlir
@@ -2555,6 +2555,17 @@ llvm.func @always_inline() attributes { always_inline } {
// -----
+// CHECK-LABEL: @inline_hint
+// CHECK-SAME: #[[ATTRS:[0-9]+]]
+llvm.func @inline_hint() attributes { inline_hint } {
+ llvm.return
+}
+
+// CHECK: #[[ATTRS]]
+// CHECK-SAME: inlinehint
+
+// -----
+
// CHECK-LABEL: @optimize_none
// CHECK-SAME: #[[ATTRS:[0-9]+]]
llvm.func @optimize_none() attributes { no_inline, optimize_none } {
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
new file mode 100644
index 0000000..04e2ddf
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: @convert_f32x2_to_f4x2_e2m1
+llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) {
+ // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res1]] to i8
+ %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN)
+ // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8
+ %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN)
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 0b36154..6cccfe4 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -254,6 +254,14 @@ llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) {
// -----
+llvm.func @nvvm_cvt_f32x2_to_f4x2_invalid_type(%a : f32, %b : f32) {
+ // expected-error @below {{Only 'f4E2M1FN' type is supported for conversions from f32x2 to f4x2.}}
+ %res = nvvm.convert.f32x2.to.f4x2 %a, %b : i8 (f8E4M3FN)
+ llvm.return
+}
+
+// -----
+
llvm.func @nvvm_prefetch_L1_with_evict_priority(%global_ptr: !llvm.ptr<1>) {
// expected-error @below {{cache eviction priority supported only for cache level L2}}
nvvm.prefetch level = L1, evict_priority = evict_last, %global_ptr : !llvm.ptr<1>
@@ -559,3 +567,25 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
llvm.return
}
+
+// -----
+
+// Test that ensures invalid row/col layouts for matrices A and B are not accepted
+llvm.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, %b0 : i32, %c0 : i32, %c1 : i32, %c2 : i32, %c3 : i32) -> !llvm.struct<(i32,i32,i32,i32)> {
+ // expected-error@+1 {{Only m8n8k4 with f16 supports other layouts.}}
+ %0 = nvvm.mma.sync A[%a0, %a1] B[%b0] C[%c0, %c1, %c2, %c3]
+ {layoutA = #nvvm.mma_layout<col>, layoutB = #nvvm.mma_layout<col>,
+ multiplicandAPtxType = #nvvm.mma_type<s4>, multiplicandBPtxType = #nvvm.mma_type<s4>,
+ intOverflowBehavior=#nvvm.mma_int_overflow<satfinite>,
+ shape = #nvvm.shape<m = 16, n = 8, k = 32>} : (i32, i32, i32) -> !llvm.struct<(i32,i32,i32,i32)>
+ llvm.return %0 : !llvm.struct<(i32,i32,i32,i32)>
+}
+
+// -----
+
+// Test for range validation - invalid range where lower == upper but not at extremes
+func.func @invalid_range_equal_bounds() {
+ // expected-error @below {{invalid range attribute: Lower == Upper, but they aren't min (0) or max (4294967295) value! This is an invalid constant range.}}
+ %0 = nvvm.read.ptx.sreg.warpsize range <i32, 32, 32> : i32
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 00a479d..594ae48 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -152,6 +152,10 @@ llvm.func @nvvm_special_regs() -> i32 {
%74 = nvvm.read.ptx.sreg.lanemask.ge : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
%75 = nvvm.read.ptx.sreg.lanemask.gt : i32
+ // CHECK: %76 = call range(i32 0, 0) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32
+ // CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32
llvm.return %1 : i32
}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index fdd2c91..5f7d488 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -276,6 +276,20 @@ llvm.func @rocdl.s.wait.expcnt() {
llvm.return
}
+llvm.func @rocdl.s.wait.asynccnt() {
+ // CHECK-LABEL: rocdl.s.wait.asynccnt
+ // CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+ rocdl.s.wait.asynccnt 0
+ llvm.return
+}
+
+llvm.func @rocdl.s.wait.tensorcnt() {
+ // CHECK-LABEL: rocdl.s.wait.tensorcnt
+ // CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+ rocdl.s.wait.tensorcnt 0
+ llvm.return
+}
+
llvm.func @rocdl.setprio() {
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
rocdl.s.setprio 0
@@ -858,9 +872,11 @@ llvm.func @rocdl.mfma.scale.f32.16x16x128.f8f6f4(%arg0 : i32,
}
llvm.func @rocdl.wmma(%arg0 : vector<8xf32>, %arg1 : vector<16 x f16>, %arg2 : vector<16 x i16>, %arg3 : vector<8 x i32>,
- %arg4 : vector<2xi32>, %arg5 : vector<4xi32>, %arg6 : vector<4xf32>, %arg7 : vector<8xf16>, %arg8 : vector<8xi16>) -> vector<8xf32> {
+ %arg4 : vector<2xi32>, %arg5 : vector<4xi32>, %arg6 : vector<4xf32>, %arg7 : vector<8xf16>, %arg8 : vector<8xi16>,
+ %arg9 : vector<32xf16>, %arg10 : vector<16xf32>, %arg11 : vector<4xf32>, %arg12 : vector<32xf32>, %arg13 : vector<64xf32>,
+ %arg14 : vector<64xi32>, %arg15 : vector<64xf16>, %arg16 : vector<16xbf16>, %arg17 : vector<32xbf16>) -> vector<8xf32> {
%zero = llvm.mlir.constant(false) : i1
-
+ %zero_i16 = llvm.mlir.constant(0 : i16) : i16
// ---- Wave32 -----
// f16 -> f32
@@ -891,6 +907,83 @@ llvm.func @rocdl.wmma(%arg0 : vector<8xf32>, %arg1 : vector<16 x f16>, %arg2 : v
// CHECK: call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 {{.*}}, <2 x i32> %{{.*}}, i1 {{.*}}, <2 x i32> %{{.*}}, <8 x i32> %{{.*}}, i1 {{.*}})
%r6.gfx12 = rocdl.wmma.i32.16x16x32.iu4 %zero, %arg4, %zero, %arg4, %arg3, %zero : (i1, vector<2xi32>, i1, vector<2xi32>, vector<8xi32>, i1) -> vector<8xi32>
+ // f32 -> f32
+ // CHECK: call <4 x float> @llvm.amdgcn.wmma.f32.16x16x4.f32.v4f32.v16f32(i1 {{.*}}, <16 x float> %{{.*}}, i1 {{.*}}, <16 x float> %{{.*}}, i16 0, <4 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r1.gfx1250 = rocdl.wmma.f32.16x16x4.f32 %zero, %arg10, %zero, %arg10, %zero_i16, %arg11, %zero, %zero : (i1, vector<16xf32>, i1, vector<16xf32>, i16, vector<4xf32>, i1, i1) -> vector<4xf32>
+
+ // f16 -> f32
+ // CHECK: call <32 x float> @llvm.amdgcn.wmma.f32.16x16x32.f16.v32f32.v16f16(i1 {{.*}}, <16 x half> %{{.*}}, i1 {{.*}}, <16 x half> %{{.*}}, i16 0, <32 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r2.gfx1250 = rocdl.wmma.f32.16x16x32.f16 %zero, %arg1, %zero, %arg1, %zero_i16, %arg12, %zero, %zero : (i1, vector<16xf16>, i1, vector<16xf16>, i16, vector<32xf32>, i1, i1) -> vector<32xf32>
+
+ // bf16 -> f32
+ // CHECK: call <32 x float> @llvm.amdgcn.wmma.f32.16x16x32.bf16.v32f32.v16bf16(i1 {{.*}}, <16 x bfloat> %{{.*}}, i1 {{.*}}, <16 x bfloat> %{{.*}}, i16 0, <32 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r3.gfx1250 = rocdl.wmma.f32.16x16x32.bf16 %zero, %arg16, %zero, %arg16, %zero_i16, %arg12, %zero, %zero : (i1, vector<16xbf16>, i1, vector<16xbf16>, i16, vector<32xf32>, i1, i1) -> vector<32xf32>
+
+ // f16 -> f16
+ // CHECK: call <32 x half> @llvm.amdgcn.wmma.f16.16x16x32.f16.v32f16.v16f16(i1 {{.*}}, <16 x half> %{{.*}}, i1 {{.*}}, <16 x half> %{{.*}}, i16 0, <32 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r4.gfx1250 = rocdl.wmma.f16.16x16x32.f16 %zero, %arg1, %zero, %arg1, %zero_i16, %arg9, %zero, %zero : (i1, vector<16xf16>, i1, vector<16xf16>, i16, vector<32xf16>, i1, i1) -> vector<32xf16>
+
+ // bf16 -> bf16
+ // CHECK: call <32 x bfloat> @llvm.amdgcn.wmma.bf16.16x16x32.bf16.v32bf16.v16bf16(i1 {{.*}}, <16 x bfloat> %{{.*}}, i1 {{.*}}, <16 x bfloat> %{{.*}}, i16 0, <32 x bfloat> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r5.gfx1250 = rocdl.wmma.bf16.16x16x32.bf16 %zero, %arg16, %zero, %arg16, %zero_i16, %arg17, %zero, %zero : (i1, vector<16xbf16>, i1, vector<16xbf16>, i16, vector<32xbf16>, i1, i1) -> vector<32xbf16>
+
+ // bf16 -> bf16 / f32
+ // CHECK: call <32 x bfloat> @llvm.amdgcn.wmma.bf16f32.16x16x32.bf16.v32bf16.v16bf16.v32f32(i1 {{.*}}, <16 x bfloat> %{{.*}}, i1 {{.*}}, <16 x bfloat> %{{.*}}, i16 0, <32 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r6.gfx1250 = rocdl.wmma.bf16f32.16x16x32.bf16 %zero, %arg16, %zero, %arg16, %zero_i16, %arg12, %zero, %zero : (i1, vector<16xbf16>, i1, vector<16xbf16>, i16, vector<32xf32>, i1, i1) -> vector<32xbf16>
+
+ // f8/bf8 -> f16/f32
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x64.fp8.fp8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r7.gfx1250 = rocdl.wmma.f32.16x16x64.fp8_fp8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x64.fp8.bf8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r8.gfx1250 = rocdl.wmma.f32.16x16x64.fp8_bf8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x64.bf8.fp8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r9.gfx1250 = rocdl.wmma.f32.16x16x64.bf8_fp8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x64.bf8.bf8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r10.gfx1250 = rocdl.wmma.f32.16x16x64.bf8_bf8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x64.fp8.fp8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r11.gfx1250 = rocdl.wmma.f16.16x16x64.fp8_fp8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x64.fp8.bf8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r12.gfx1250 = rocdl.wmma.f16.16x16x64.fp8_bf8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x64.bf8.fp8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r13.gfx1250 = rocdl.wmma.f16.16x16x64.bf8_fp8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x64.bf8.bf8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r14.gfx1250 = rocdl.wmma.f16.16x16x64.bf8_bf8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x128.fp8.fp8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r15.gfx1250 = rocdl.wmma.f32.16x16x128.fp8_fp8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x128.fp8.bf8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r16.gfx1250 = rocdl.wmma.f32.16x16x128.fp8_bf8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x128.bf8.fp8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r17.gfx1250 = rocdl.wmma.f32.16x16x128.bf8_fp8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x float> @llvm.amdgcn.wmma.f32.16x16x128.bf8.bf8.v64f32.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x float> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r18.gfx1250 = rocdl.wmma.f32.16x16x128.bf8_bf8 %arg5, %arg5, %zero_i16, %arg13, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf32>, i1, i1) -> vector<64xf32>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.fp8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r19.gfx1250 = rocdl.wmma.f16.16x16x128.fp8_fp8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.bf8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r20.gfx1250 = rocdl.wmma.f16.16x16x128.fp8_bf8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x128.bf8.fp8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r21.gfx1250 = rocdl.wmma.f16.16x16x128.bf8_fp8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // CHECK: call <64 x half> @llvm.amdgcn.wmma.f16.16x16x128.bf8.bf8.v64f16.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i16 0, <64 x half> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r22.gfx1250 = rocdl.wmma.f16.16x16x128.bf8_bf8 %arg5, %arg5, %zero_i16, %arg15, %zero, %zero : (vector<4xi32>, vector<4xi32>, i16, vector<64xf16>, i1, i1) -> vector<64xf16>
+
+ // iu8 -> i32
+ // CHECK: call <64 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v64i32.v4i32(i1 {{.*}}, <4 x i32> %{{.*}}, i1 {{.*}}, <4 x i32> %{{.*}}, <64 x i32> %{{.*}}, i1 {{.*}}, i1 {{.*}})
+ %r23.gfx1250 = rocdl.wmma.i32.16x16x64.iu8 %zero, %arg5, %zero, %arg5, %arg14, %zero, %zero : (i1, vector<4xi32>, i1, vector<4xi32>, vector<64xi32>, i1, i1) -> vector<64xi32>
+
// ---- Wave64 -----
// f16 -> f32
diff --git a/mlir/test/Target/Wasm/abs.mlir b/mlir/test/Target/Wasm/abs.mlir
index 9c45ba7..fe3602a 100644
--- a/mlir/test/Target/Wasm/abs.mlir
+++ b/mlir/test/Target/Wasm/abs.mlir
@@ -12,12 +12,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @abs_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @abs_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.abs %[[VAL_0]] : f32
// CHECK: wasmssa.return %[[VAL_1]] : f32
-// CHECK-LABEL: wasmssa.func @abs_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @abs_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.abs %[[VAL_0]] : f64
// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/add_div.mlir b/mlir/test/Target/Wasm/add_div.mlir
new file mode 100644
index 0000000..8a87c60
--- /dev/null
+++ b/mlir/test/Target/Wasm/add_div.mlir
@@ -0,0 +1,40 @@
+// RUN: yaml2obj %S/inputs/add_div.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+ (module $test.wasm
+ (type (;0;) (func (param i32) (result i32)))
+ (type (;1;) (func (param i32 i32) (result i32)))
+ (import "env" "twoTimes" (func $twoTimes (type 0)))
+ (func $add (type 1) (param i32 i32) (result i32)
+ local.get 0
+ call $twoTimes
+ local.get 1
+ call $twoTimes
+ i32.add
+ i32.const 2
+ i32.div_s)
+ (memory (;0;) 2)
+ (global $__stack_pointer (mut i32) (i32.const 66560))
+ (export "memory" (memory 0))
+ (export "add" (func $add)))
+*/
+
+// CHECK-LABEL: wasmssa.import_func "twoTimes" from "env" as @func_0 {type = (i32) -> i32}
+
+// CHECK-LABEL: wasmssa.func exported @add(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>,
+// CHECK-SAME: %[[ARG1:.*]]: !wasmssa<local ref to i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.call @func_0(%[[VAL_0]]) : (i32) -> i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.local_get %[[ARG1]] : ref to i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.call @func_0(%[[VAL_2]]) : (i32) -> i32
+// CHECK: %[[VAL_4:.*]] = wasmssa.add %[[VAL_1]] %[[VAL_3]] : i32
+// CHECK: %[[VAL_5:.*]] = wasmssa.const 2 : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.div_si %[[VAL_4]] %[[VAL_5]] : i32
+// CHECK: wasmssa.return %[[VAL_6]] : i32
+// CHECK: }
+// CHECK: wasmssa.memory exported @memory !wasmssa<limit[2:]>
+
+// CHECK-LABEL: wasmssa.global @global_0 i32 mutable : {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 66560 : i32
+// CHECK: wasmssa.return %[[VAL_0]] : i32
diff --git a/mlir/test/Target/Wasm/and.mlir b/mlir/test/Target/Wasm/and.mlir
index 4c0fea0..323d41a 100644
--- a/mlir/test/Target/Wasm/and.mlir
+++ b/mlir/test/Target/Wasm/and.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @and_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @and_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.and %0 %1 : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @and_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @and_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.and %0 %1 : i64
diff --git a/mlir/test/Target/Wasm/block.mlir b/mlir/test/Target/Wasm/block.mlir
new file mode 100644
index 0000000..c85fc1e
--- /dev/null
+++ b/mlir/test/Target/Wasm/block.mlir
@@ -0,0 +1,16 @@
+// RUN: yaml2obj %S/inputs/block.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+(func(export "i_am_a_block")
+(block $i_am_a_block)
+)
+)
+*/
+
+// CHECK-LABEL: wasmssa.func exported @i_am_a_block() {
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.return
diff --git a/mlir/test/Target/Wasm/block_complete_type.mlir b/mlir/test/Target/Wasm/block_complete_type.mlir
new file mode 100644
index 0000000..67df198
--- /dev/null
+++ b/mlir/test/Target/Wasm/block_complete_type.mlir
@@ -0,0 +1,24 @@
+// RUN: yaml2obj %S/inputs/block_complete_type.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (type (;0;) (func (param i32) (result i32)))
+ (type (;1;) (func (result i32)))
+ (func (;0;) (type 1) (result i32)
+ i32.const 14
+ block (param i32) (result i32) ;; label = @1
+ i32.const 1
+ i32.add
+ end))
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 14 : i32
+// CHECK: wasmssa.block(%[[VAL_0]]) : i32 : {
+// CHECK: ^bb0(%[[VAL_1:.*]]: i32):
+// CHECK: %[[VAL_2:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.add %[[VAL_1]] %[[VAL_2]] : i32
+// CHECK: wasmssa.block_return %[[VAL_3]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_4:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_4]] : i32
diff --git a/mlir/test/Target/Wasm/block_value_type.mlir b/mlir/test/Target/Wasm/block_value_type.mlir
new file mode 100644
index 0000000..fa30f08
--- /dev/null
+++ b/mlir/test/Target/Wasm/block_value_type.mlir
@@ -0,0 +1,19 @@
+// RUN: yaml2obj %S/inputs/block_value_type.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (type (;0;) (func (result i32)))
+ (func (;0;) (type 0) (result i32)
+ block (result i32) ;; label = @1
+ i32.const 17
+ end))
+*/
+
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: wasmssa.block : {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 17 : i32
+// CHECK: wasmssa.block_return %[[VAL_0]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_1:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_1]] : i32
diff --git a/mlir/test/Target/Wasm/branch_if.mlir b/mlir/test/Target/Wasm/branch_if.mlir
new file mode 100644
index 0000000..c91ff37
--- /dev/null
+++ b/mlir/test/Target/Wasm/branch_if.mlir
@@ -0,0 +1,29 @@
+// RUN: yaml2obj %S/inputs/branch_if.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (type $produce_i32 (func (result i32)))
+ (func (type $produce_i32)
+ (block $my_block (type $produce_i32)
+ i32.const 1
+ i32.const 2
+ br_if $my_block
+ i32.const 1
+ i32.add
+ )
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: wasmssa.block : {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 2 : i32
+// CHECK: wasmssa.branch_if %[[VAL_1]] to level 0 with args(%[[VAL_0]] : i32) else ^bb1
+// CHECK: ^bb1:
+// CHECK: %[[VAL_2:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.add %[[VAL_0]] %[[VAL_2]] : i32
+// CHECK: wasmssa.block_return %[[VAL_3]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_4:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_4]] : i32
diff --git a/mlir/test/Target/Wasm/call.mlir b/mlir/test/Target/Wasm/call.mlir
new file mode 100644
index 0000000..c0169aa
--- /dev/null
+++ b/mlir/test/Target/Wasm/call.mlir
@@ -0,0 +1,17 @@
+// RUN: yaml2obj %S/inputs/call.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+(func $forty_two (result i32)
+i32.const 42)
+(func(export "forty_two")(result i32)
+call $forty_two))
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 42 : i32
+// CHECK: wasmssa.return %[[VAL_0]] : i32
+
+// CHECK-LABEL: wasmssa.func exported @forty_two() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.call @func_0 : () -> i32
+// CHECK: wasmssa.return %[[VAL_0]] : i32
diff --git a/mlir/test/Target/Wasm/clz.mlir b/mlir/test/Target/Wasm/clz.mlir
index 3e6641d..858c09d 100644
--- a/mlir/test/Target/Wasm/clz.mlir
+++ b/mlir/test/Target/Wasm/clz.mlir
@@ -14,12 +14,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @clz_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @clz_i32() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.clz %[[VAL_0]] : i32
// CHECK: wasmssa.return %[[VAL_1]] : i32
-// CHECK-LABEL: wasmssa.func @clz_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @clz_i64() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.clz %[[VAL_0]] : i64
// CHECK: wasmssa.return %[[VAL_1]] : i64
diff --git a/mlir/test/Target/Wasm/comparison_ops.mlir b/mlir/test/Target/Wasm/comparison_ops.mlir
new file mode 100644
index 0000000..91e3a6a
--- /dev/null
+++ b/mlir/test/Target/Wasm/comparison_ops.mlir
@@ -0,0 +1,269 @@
+// RUN: yaml2obj %S/inputs/comparison_ops.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func $lt_si32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.lt_s
+ )
+ (func $le_si32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.le_s
+ )
+ (func $lt_ui32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.lt_u
+ )
+ (func $le_ui32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.le_u
+ )
+ (func $gt_si32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.gt_s
+ )
+ (func $gt_ui32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.gt_u
+ )
+ (func $ge_si32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.ge_s
+ )
+ (func $ge_ui32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.ge_u
+ )
+ (func $lt_si64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.lt_s
+ )
+ (func $le_si64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.le_s
+ )
+ (func $lt_ui64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.lt_u
+ )
+ (func $le_ui64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.le_u
+ )
+ (func $gt_si64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.gt_s
+ )
+ (func $gt_ui64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.gt_u
+ )
+ (func $ge_si64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.ge_s
+ )
+ (func $ge_ui64 (result i32)
+ i64.const 12
+ i64.const 50
+ i64.ge_u
+ )
+ (func $lt_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.lt
+ )
+ (func $le_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.le
+ )
+ (func $gt_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.gt
+ )
+ (func $ge_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.ge
+ )
+ (func $lt_f64 (result i32)
+ f64.const 5
+ f64.const 14
+ f64.lt
+ )
+ (func $le_f64 (result i32)
+ f64.const 5
+ f64.const 14
+ f64.le
+ )
+ (func $gt_f64 (result i32)
+ f64.const 5
+ f64.const 14
+ f64.gt
+ )
+ (func $ge_f64 (result i32)
+ f64.const 5
+ f64.const 14
+ f64.ge
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt_si %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_1() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.le_si %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_2() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt_ui %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_3() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.le_ui %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_4() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt_si %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_5() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt_ui %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_6() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge_si %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_7() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge_ui %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_8() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt_si %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_9() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.le_si %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_10() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt_ui %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_11() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.le_ui %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_12() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt_si %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_13() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt_ui %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_14() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge_si %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_15() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge_ui %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_16() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_17() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.le %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_18() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_19() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_20() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.lt %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_21() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.le %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_22() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.gt %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_23() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.ge %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
diff --git a/mlir/test/Target/Wasm/const.mlir b/mlir/test/Target/Wasm/const.mlir
index aa9e76f..adb792a 100644
--- a/mlir/test/Target/Wasm/const.mlir
+++ b/mlir/test/Target/Wasm/const.mlir
@@ -16,22 +16,22 @@
)
*/
-// CHECK-LABEL: wasmssa.func nested @func_0() -> i32 {
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1 : i32
// CHECK: wasmssa.return %[[VAL_0]] : i32
// CHECK: }
-// CHECK-LABEL: wasmssa.func nested @func_1() -> i64 {
+// CHECK-LABEL: wasmssa.func @func_1() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 3 : i64
// CHECK: wasmssa.return %[[VAL_0]] : i64
// CHECK: }
-// CHECK-LABEL: wasmssa.func nested @func_2() -> f32 {
+// CHECK-LABEL: wasmssa.func @func_2() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 4.000000e+00 : f32
// CHECK: wasmssa.return %[[VAL_0]] : f32
// CHECK: }
-// CHECK-LABEL: wasmssa.func nested @func_3() -> f64 {
+// CHECK-LABEL: wasmssa.func @func_3() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 9.000000e+00 : f64
// CHECK: wasmssa.return %[[VAL_0]] : f64
// CHECK: }
diff --git a/mlir/test/Target/Wasm/convert.mlir b/mlir/test/Target/Wasm/convert.mlir
new file mode 100644
index 0000000..ddc29a7
--- /dev/null
+++ b/mlir/test/Target/Wasm/convert.mlir
@@ -0,0 +1,85 @@
+// RUN: yaml2obj %S/inputs/convert.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to generate this test:
+(module
+ (func (export "convert_i32_u_to_f32") (result f32)
+ i32.const 10
+ f32.convert_i32_u
+ )
+
+ (func (export "convert_i32_s_to_f32") (result f32)
+ i32.const 42
+ f32.convert_i32_s
+ )
+
+ (func (export "convert_i64_u_to_f32") (result f32)
+ i64.const 17
+ f32.convert_i64_u
+ )
+
+ (func (export "convert_i64s_to_f32") (result f32)
+ i64.const 10
+ f32.convert_i64_s
+ )
+
+ (func (export "convert_i32_u_to_f64") (result f64)
+ i32.const 10
+ f64.convert_i32_u
+ )
+
+ (func (export "convert_i32_s_to_f64") (result f64)
+ i32.const 42
+ f64.convert_i32_s
+ )
+
+ (func (export "convert_i64_u_to_f64") (result f64)
+ i64.const 17
+ f64.convert_i64_u
+ )
+
+ (func (export "convert_i64s_to_f64") (result f64)
+ i64.const 10
+ f64.convert_i64_s
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func exported @convert_i32_u_to_f32() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_u %[[VAL_0]] : i32 to f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func exported @convert_i32_s_to_f32() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 42 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_s %[[VAL_0]] : i32 to f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func exported @convert_i64_u_to_f32() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 17 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_u %[[VAL_0]] : i64 to f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func exported @convert_i64s_to_f32() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_s %[[VAL_0]] : i64 to f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func exported @convert_i32_u_to_f64() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_u %[[VAL_0]] : i32 to f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func exported @convert_i32_s_to_f64() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 42 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_s %[[VAL_0]] : i32 to f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func exported @convert_i64_u_to_f64() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 17 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_u %[[VAL_0]] : i64 to f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func exported @convert_i64s_to_f64() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.convert_s %[[VAL_0]] : i64 to f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/copysign.mlir b/mlir/test/Target/Wasm/copysign.mlir
index 33d7a56..90c5b11 100644
--- a/mlir/test/Target/Wasm/copysign.mlir
+++ b/mlir/test/Target/Wasm/copysign.mlir
@@ -16,14 +16,14 @@
)
*/
-// CHECK-LABEL: wasmssa.func @copysign_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @copysign_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f32
// CHECK: %[[VAL_2:.*]] = wasmssa.copysign %[[VAL_0]] %[[VAL_1]] : f32
// CHECK: wasmssa.return %[[VAL_2]] : f32
// CHECK: }
-// CHECK-LABEL: wasmssa.func @copysign_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @copysign_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f64
// CHECK: %[[VAL_2:.*]] = wasmssa.copysign %[[VAL_0]] %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/ctz.mlir b/mlir/test/Target/Wasm/ctz.mlir
index 6c0806f..9e7cc5e 100644
--- a/mlir/test/Target/Wasm/ctz.mlir
+++ b/mlir/test/Target/Wasm/ctz.mlir
@@ -14,12 +14,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @ctz_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @ctz_i32() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.ctz %[[VAL_0]] : i32
// CHECK: wasmssa.return %[[VAL_1]] : i32
-// CHECK-LABEL: wasmssa.func @ctz_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @ctz_i64() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.ctz %[[VAL_0]] : i64
// CHECK: wasmssa.return %[[VAL_1]] : i64
diff --git a/mlir/test/Target/Wasm/demote.mlir b/mlir/test/Target/Wasm/demote.mlir
new file mode 100644
index 0000000..3d2bc05
--- /dev/null
+++ b/mlir/test/Target/Wasm/demote.mlir
@@ -0,0 +1,15 @@
+// RUN: yaml2obj %S/inputs/demote.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (func $main (result f32)
+ f64.const 2.24
+ f32.demote_f64
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 2.240000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.demote %[[VAL_0]] : f64 to f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
diff --git a/mlir/test/Target/Wasm/div.mlir b/mlir/test/Target/Wasm/div.mlir
index c91f780..4967d96 100644
--- a/mlir/test/Target/Wasm/div.mlir
+++ b/mlir/test/Target/Wasm/div.mlir
@@ -66,61 +66,61 @@
)
*/
-// CHECK-LABEL: wasmssa.func @div_u_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @div_u_i32() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.div_ui %[[VAL_0]] %[[VAL_1]] : i32
// CHECK: wasmssa.return %[[VAL_2]] : i32
-// CHECK-LABEL: wasmssa.func @div_u_i32_zero() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @div_u_i32_zero() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 0 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.div_ui %[[VAL_0]] %[[VAL_1]] : i32
// CHECK: wasmssa.return %[[VAL_2]] : i32
-// CHECK-LABEL: wasmssa.func @div_s_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @div_s_i32() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.div_si %[[VAL_0]] %[[VAL_1]] : i32
// CHECK: wasmssa.return %[[VAL_2]] : i32
-// CHECK-LABEL: wasmssa.func @div_s_i32_zero() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @div_s_i32_zero() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 0 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.div_si %[[VAL_0]] %[[VAL_1]] : i32
// CHECK: wasmssa.return %[[VAL_2]] : i32
-// CHECK-LABEL: wasmssa.func @div_u_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @div_u_i64() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2 : i64
// CHECK: %[[VAL_2:.*]] = wasmssa.div_ui %[[VAL_0]] %[[VAL_1]] : i64
// CHECK: wasmssa.return %[[VAL_2]] : i64
-// CHECK-LABEL: wasmssa.func @div_u_i64_zero() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @div_u_i64_zero() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 0 : i64
// CHECK: %[[VAL_2:.*]] = wasmssa.div_ui %[[VAL_0]] %[[VAL_1]] : i64
// CHECK: wasmssa.return %[[VAL_2]] : i64
-// CHECK-LABEL: wasmssa.func @div_s_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @div_s_i64() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2 : i64
// CHECK: %[[VAL_2:.*]] = wasmssa.div_si %[[VAL_0]] %[[VAL_1]] : i64
// CHECK: wasmssa.return %[[VAL_2]] : i64
-// CHECK-LABEL: wasmssa.func @div_s_i64_zero() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @div_s_i64_zero() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 0 : i64
// CHECK: %[[VAL_2:.*]] = wasmssa.div_si %[[VAL_0]] %[[VAL_1]] : i64
// CHECK: wasmssa.return %[[VAL_2]] : i64
-// CHECK-LABEL: wasmssa.func @div_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @div_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2.000000e+00 : f32
// CHECK: %[[VAL_2:.*]] = wasmssa.div %[[VAL_0]] %[[VAL_1]] : f32
// CHECK: wasmssa.return %[[VAL_2]] : f32
-// CHECK-LABEL: wasmssa.func @div_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @div_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 2.000000e+00 : f64
// CHECK: %[[VAL_2:.*]] = wasmssa.div %[[VAL_0]] %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/double_nested_loop.mlir b/mlir/test/Target/Wasm/double_nested_loop.mlir
new file mode 100644
index 0000000..8b3e499
--- /dev/null
+++ b/mlir/test/Target/Wasm/double_nested_loop.mlir
@@ -0,0 +1,63 @@
+// RUN: yaml2obj %S/inputs/double_nested_loop.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/*
+(module
+ (func
+ ;; create a local variable and initialize it to 0
+ (local $i i32)
+ (local $j i32)
+
+ (loop $my_loop
+
+ ;; add one to $i
+ local.get $i
+ i32.const 1
+ i32.add
+ local.set $i
+ (loop $my_second_loop (result i32)
+ i32.const 1
+ local.get $j
+ i32.const 12
+ i32.add
+ local.tee $j
+ local.get $i
+ i32.gt_s
+ br_if $my_second_loop
+ )
+ i32.const 10
+ i32.lt_s
+ br_if $my_loop
+ )
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local of type i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.local of type i32
+// CHECK: wasmssa.loop : {
+// CHECK: %[[VAL_2:.*]] = wasmssa.local_get %[[VAL_0]] : ref to i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_4:.*]] = wasmssa.add %[[VAL_2]] %[[VAL_3]] : i32
+// CHECK: wasmssa.local_set %[[VAL_0]] : ref to i32 to %[[VAL_4]] : i32
+// CHECK: wasmssa.loop : {
+// CHECK: %[[VAL_5:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.local_get %[[VAL_1]] : ref to i32
+// CHECK: %[[VAL_7:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_8:.*]] = wasmssa.add %[[VAL_6]] %[[VAL_7]] : i32
+// CHECK: %[[VAL_9:.*]] = wasmssa.local_tee %[[VAL_1]] : ref to i32 to %[[VAL_8]] : i32
+// CHECK: %[[VAL_10:.*]] = wasmssa.local_get %[[VAL_0]] : ref to i32
+// CHECK: %[[VAL_11:.*]] = wasmssa.gt_si %[[VAL_9]] %[[VAL_10]] : i32 -> i32
+// CHECK: wasmssa.branch_if %[[VAL_11]] to level 0 else ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.block_return %[[VAL_5]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_12:.*]]: i32):
+// CHECK: %[[VAL_13:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_14:.*]] = wasmssa.lt_si %[[VAL_12]] %[[VAL_13]] : i32 -> i32
+// CHECK: wasmssa.branch_if %[[VAL_14]] to level 0 else ^bb2
+// CHECK: ^bb2:
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.return
diff --git a/mlir/test/Target/Wasm/empty_blocks_list_and_stack.mlir b/mlir/test/Target/Wasm/empty_blocks_list_and_stack.mlir
new file mode 100644
index 0000000..5c98f1a
--- /dev/null
+++ b/mlir/test/Target/Wasm/empty_blocks_list_and_stack.mlir
@@ -0,0 +1,53 @@
+// RUN: yaml2obj %S/inputs/empty_blocks_list_and_stack.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (func (param $num i32)
+ (block $b1
+ (block $b2
+ (block $b3
+ )
+ )
+ )
+ )
+
+ (func (param $num i32)
+ (block $b1)
+ (block $b2)
+ (block $b3)
+ )
+)
+
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) {
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.return
+
+// CHECK-LABEL: wasmssa.func @func_1(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) {
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb2
+// CHECK: ^bb2:
+// CHECK: wasmssa.block : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb3
+// CHECK: ^bb3:
+// CHECK: wasmssa.return
diff --git a/mlir/test/Target/Wasm/eq.mlir b/mlir/test/Target/Wasm/eq.mlir
new file mode 100644
index 0000000..ba3ae2f
--- /dev/null
+++ b/mlir/test/Target/Wasm/eq.mlir
@@ -0,0 +1,56 @@
+// RUN: yaml2obj %S/inputs/eq.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func $eq_i32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.eq
+ )
+
+ (func $eq_i64 (result i32)
+ i64.const 20
+ i64.const 5
+ i64.eq
+ )
+
+ (func $eq_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.eq
+ )
+
+ (func $eq_f64 (result i32)
+ f64.const 17
+ f64.const 0
+ f64.eq
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.eq %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+// CHECK: }
+
+// CHECK-LABEL: wasmssa.func @func_1() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 20 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 5 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.eq %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+// CHECK: }
+
+// CHECK-LABEL: wasmssa.func @func_2() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.eq %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+// CHECK: }
+
+// CHECK-LABEL: wasmssa.func @func_3() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.700000e+01 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 0.000000e+00 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.eq %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+// CHECK: }
diff --git a/mlir/test/Target/Wasm/eqz.mlir b/mlir/test/Target/Wasm/eqz.mlir
new file mode 100644
index 0000000..55cf94a
--- /dev/null
+++ b/mlir/test/Target/Wasm/eqz.mlir
@@ -0,0 +1,21 @@
+// RUN: yaml2obj %S/inputs/eqz.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func (export "eqz_i32") (result i32)
+ i32.const 13
+ i32.eqz)
+
+ (func (export "eqz_i64") (result i32)
+ i64.const 13
+ i64.eqz)
+)
+*/
+// CHECK-LABEL: wasmssa.func exported @eqz_i32() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 13 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.eqz %[[VAL_0]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
+
+// CHECK-LABEL: wasmssa.func exported @eqz_i64() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 13 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.eqz %[[VAL_0]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
diff --git a/mlir/test/Target/Wasm/extend.mlir b/mlir/test/Target/Wasm/extend.mlir
new file mode 100644
index 0000000..5d4446a
--- /dev/null
+++ b/mlir/test/Target/Wasm/extend.mlir
@@ -0,0 +1,69 @@
+// RUN: yaml2obj %S/inputs/extend.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+ (func $i32_s (result i64)
+ i32.const 10
+ i64.extend_i32_s
+ )
+ (func $i32_u (result i64)
+ i32.const 10
+ i64.extend_i32_u
+ )
+ (func $extend8_32 (result i32)
+ i32.const 10
+ i32.extend8_s
+ )
+ (func $extend16_32 (result i32)
+ i32.const 10
+ i32.extend16_s
+ )
+ (func $extend8_64 (result i64)
+ i64.const 10
+ i64.extend8_s
+ )
+ (func $extend16_64 (result i64)
+ i64.const 10
+ i64.extend16_s
+ )
+ (func $extend32_64 (result i64)
+ i64.const 10
+ i64.extend32_s
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend_i32_s %[[VAL_0]] to i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
+
+// CHECK-LABEL: wasmssa.func @func_1() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend_i32_u %[[VAL_0]] to i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
+
+// CHECK-LABEL: wasmssa.func @func_2() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend 8 : ui32 low bits from %[[VAL_0]] : i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_3() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend 16 : ui32 low bits from %[[VAL_0]] : i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_4() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend 8 : ui32 low bits from %[[VAL_0]] : i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
+
+// CHECK-LABEL: wasmssa.func @func_5() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend 16 : ui32 low bits from %[[VAL_0]] : i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
+
+// CHECK-LABEL: wasmssa.func @func_6() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.extend 32 : ui32 low bits from %[[VAL_0]] : i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
diff --git a/mlir/test/Target/Wasm/global.mlir b/mlir/test/Target/Wasm/global.mlir
index e72fe69..1e4fe44 100644
--- a/mlir/test/Target/Wasm/global.mlir
+++ b/mlir/test/Target/Wasm/global.mlir
@@ -29,9 +29,9 @@ i32.add
)
*/
-// CHECK-LABEL: wasmssa.import_global "from_js" from "env" as @global_0 nested : i32
+// CHECK-LABEL: wasmssa.import_global "from_js" from "env" as @global_0 : i32
-// CHECK-LABEL: wasmssa.func nested @func_0() -> i32 {
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.global_get @global_0 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.global_get @global_1 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.add %[[VAL_0]] %[[VAL_1]] : i32
@@ -41,26 +41,26 @@ i32.add
// CHECK: %[[VAL_6:.*]] = wasmssa.add %[[VAL_2]] %[[VAL_5]] : i32
// CHECK: wasmssa.return %[[VAL_6]] : i32
-// CHECK-LABEL: wasmssa.global @global_1 i32 nested : {
+// CHECK-LABEL: wasmssa.global @global_1 i32 : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: wasmssa.return %[[VAL_0]] : i32
-// CHECK-LABEL: wasmssa.global @global_2 i32 mutable nested : {
+// CHECK-LABEL: wasmssa.global @global_2 i32 mutable : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: wasmssa.return %[[VAL_0]] : i32
-// CHECK-LABEL: wasmssa.global @global_3 i32 mutable nested : {
+// CHECK-LABEL: wasmssa.global @global_3 i32 mutable : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: wasmssa.return %[[VAL_0]] : i32
-// CHECK-LABEL: wasmssa.global @global_4 i64 nested : {
+// CHECK-LABEL: wasmssa.global @global_4 i64 : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 11 : i64
// CHECK: wasmssa.return %[[VAL_0]] : i64
-// CHECK-LABEL: wasmssa.global @global_5 f32 nested : {
+// CHECK-LABEL: wasmssa.global @global_5 f32 : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.200000e+01 : f32
// CHECK: wasmssa.return %[[VAL_0]] : f32
-// CHECK-LABEL: wasmssa.global @global_6 f64 nested : {
+// CHECK-LABEL: wasmssa.global @global_6 f64 : {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.300000e+01 : f64
// CHECK: wasmssa.return %[[VAL_0]] : f64
diff --git a/mlir/test/Target/Wasm/if.mlir b/mlir/test/Target/Wasm/if.mlir
new file mode 100644
index 0000000..2d7bfbe
--- /dev/null
+++ b/mlir/test/Target/Wasm/if.mlir
@@ -0,0 +1,112 @@
+// RUN: yaml2obj %S/inputs/if.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to create this test:
+(module
+(type $intMapper (func (param $input i32) (result i32)))
+(func $if_else (type $intMapper)
+ local.get 0
+ i32.const 1
+ i32.and
+ if $isOdd (result i32)
+ local.get 0
+ i32.const 3
+ i32.mul
+ i32.const 1
+ i32.add
+ else
+ local.get 0
+ i32.const 1
+ i32.shr_u
+ end
+)
+
+(func $if_only (type $intMapper)
+ local.get 0
+ local.get 0
+ i32.const 1
+ i32.and
+ if $isOdd (type $intMapper)
+ i32.const 1
+ i32.add
+ end
+)
+
+(func $if_if (type $intMapper)
+ local.get 0
+ i32.ctz
+ if $isEven (result i32)
+ i32.const 2
+ local.get 0
+ i32.const 1
+ i32.shr_u
+ i32.ctz
+ if $isMultipleOfFour (type $intMapper)
+ i32.const 2
+ i32.add
+ end
+ else
+ i32.const 1
+ end
+)
+)
+*/
+// CHECK-LABEL: wasmssa.func @func_0(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.and %[[VAL_0]] %[[VAL_1]] : i32
+// CHECK: wasmssa.if %[[VAL_2]] : {
+// CHECK: %[[VAL_3:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_4:.*]] = wasmssa.const 3 : i32
+// CHECK: %[[VAL_5:.*]] = wasmssa.mul %[[VAL_3]] %[[VAL_4]] : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_7:.*]] = wasmssa.add %[[VAL_5]] %[[VAL_6]] : i32
+// CHECK: wasmssa.block_return %[[VAL_7]] : i32
+// CHECK: } "else "{
+// CHECK: %[[VAL_8:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_9:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_10:.*]] = wasmssa.shr_u %[[VAL_8]] by %[[VAL_9]] bits : i32
+// CHECK: wasmssa.block_return %[[VAL_10]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_11:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_11]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_1(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.and %[[VAL_1]] %[[VAL_2]] : i32
+// CHECK: wasmssa.if %[[VAL_3]](%[[VAL_0]]) : i32 : {
+// CHECK: ^bb0(%[[VAL_4:.*]]: i32):
+// CHECK: %[[VAL_5:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.add %[[VAL_4]] %[[VAL_5]] : i32
+// CHECK: wasmssa.block_return %[[VAL_6]] : i32
+// CHECK: } > ^bb1
+// CHECK: ^bb1(%[[VAL_7:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_7]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_2(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.ctz %[[VAL_0]] : i32
+// CHECK: wasmssa.if %[[VAL_1]] : {
+// CHECK: %[[VAL_2:.*]] = wasmssa.const 2 : i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.local_get %[[ARG0]] : ref to i32
+// CHECK: %[[VAL_4:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_5:.*]] = wasmssa.shr_u %[[VAL_3]] by %[[VAL_4]] bits : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.ctz %[[VAL_5]] : i32
+// CHECK: wasmssa.if %[[VAL_6]](%[[VAL_2]]) : i32 : {
+// CHECK: ^bb0(%[[VAL_7:.*]]: i32):
+// CHECK: %[[VAL_8:.*]] = wasmssa.const 2 : i32
+// CHECK: %[[VAL_9:.*]] = wasmssa.add %[[VAL_7]] %[[VAL_8]] : i32
+// CHECK: wasmssa.block_return %[[VAL_9]] : i32
+// CHECK: } > ^bb1
+// CHECK: ^bb1(%[[VAL_10:.*]]: i32):
+// CHECK: wasmssa.block_return %[[VAL_10]] : i32
+// CHECK: } "else "{
+// CHECK: %[[VAL_11:.*]] = wasmssa.const 1 : i32
+// CHECK: wasmssa.block_return %[[VAL_11]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_12:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_12]] : i32
diff --git a/mlir/test/Target/Wasm/import.mlir b/mlir/test/Target/Wasm/import.mlir
index 541dcf3..dcdfa52 100644
--- a/mlir/test/Target/Wasm/import.mlir
+++ b/mlir/test/Target/Wasm/import.mlir
@@ -11,9 +11,9 @@
)
*/
-// CHECK-LABEL: wasmssa.import_func "foo" from "my_module" as @func_0 {sym_visibility = "nested", type = (i32) -> ()}
-// CHECK: wasmssa.import_func "bar" from "my_module" as @func_1 {sym_visibility = "nested", type = (i32) -> ()}
-// CHECK: wasmssa.import_table "table" from "my_module" as @table_0 {sym_visibility = "nested", type = !wasmssa<tabletype !wasmssa.funcref [2:]>}
-// CHECK: wasmssa.import_mem "mem" from "my_module" as @mem_0 {limits = !wasmssa<limit[2:]>, sym_visibility = "nested"}
-// CHECK: wasmssa.import_global "glob" from "my_module" as @global_0 nested : i32
-// CHECK: wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable nested : i32
+// CHECK-LABEL: wasmssa.import_func "foo" from "my_module" as @func_0 {type = (i32) -> ()}
+// CHECK: wasmssa.import_func "bar" from "my_module" as @func_1 {type = (i32) -> ()}
+// CHECK: wasmssa.import_table "table" from "my_module" as @table_0 {type = !wasmssa<tabletype !wasmssa.funcref [2:]>}
+// CHECK: wasmssa.import_mem "mem" from "my_module" as @mem_0 {limits = !wasmssa<limit[2:]>}
+// CHECK: wasmssa.import_global "glob" from "my_module" as @global_0 : i32
+// CHECK: wasmssa.import_global "glob_mut" from "my_other_module" as @global_1 mutable : i32
diff --git a/mlir/test/Target/Wasm/inputs/add_div.yaml.wasm b/mlir/test/Target/Wasm/inputs/add_div.yaml.wasm
new file mode 100644
index 0000000..865c315
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/add_div.yaml.wasm
@@ -0,0 +1,50 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ ReturnTypes:
+ - I32
+ - Index: 1
+ ParamTypes:
+ - I32
+ - I32
+ ReturnTypes:
+ - I32
+ - Type: IMPORT
+ Imports:
+ - Module: env
+ Field: twoTimes
+ Kind: FUNCTION
+ SigIndex: 0
+ - Type: FUNCTION
+ FunctionTypes: [ 1 ]
+ - Type: MEMORY
+ Memories:
+ - Minimum: 0x2
+ - Type: GLOBAL
+ Globals:
+ - Index: 0
+ Type: I32
+ Mutable: true
+ InitExpr:
+ Opcode: I32_CONST
+ Value: 66560
+ - Type: EXPORT
+ Exports:
+ - Name: memory
+ Kind: MEMORY
+ Index: 0
+ - Name: add
+ Kind: FUNCTION
+ Index: 1
+ - Type: CODE
+ Functions:
+ - Index: 1
+ Locals: []
+ Body: 20001000200110006A41026D0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/block.yaml.wasm b/mlir/test/Target/Wasm/inputs/block.yaml.wasm
new file mode 100644
index 0000000..dd5118a
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/block.yaml.wasm
@@ -0,0 +1,22 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes: []
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: EXPORT
+ Exports:
+ - Name: i_am_a_block
+ Kind: FUNCTION
+ Index: 0
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 02400B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/block_complete_type.yaml.wasm b/mlir/test/Target/Wasm/inputs/block_complete_type.yaml.wasm
new file mode 100644
index 0000000..7a125bf
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/block_complete_type.yaml.wasm
@@ -0,0 +1,23 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ ReturnTypes:
+ - I32
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 1 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410E020041016A0B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/block_value_type.yaml.wasm b/mlir/test/Target/Wasm/inputs/block_value_type.yaml.wasm
new file mode 100644
index 0000000..4ba291d
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/block_value_type.yaml.wasm
@@ -0,0 +1,18 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 027F41110B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/branch_if.yaml.wasm b/mlir/test/Target/Wasm/inputs/branch_if.yaml.wasm
new file mode 100644
index 0000000..40536ed
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/branch_if.yaml.wasm
@@ -0,0 +1,18 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 027F410141020D0041016A0B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/call.yaml.wasm b/mlir/test/Target/Wasm/inputs/call.yaml.wasm
new file mode 100644
index 0000000..535a623
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/call.yaml.wasm
@@ -0,0 +1,26 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0 ]
+ - Type: EXPORT
+ Exports:
+ - Name: forty_two
+ Kind: FUNCTION
+ Index: 1
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 412A0B
+ - Index: 1
+ Locals: []
+ Body: 10000B
+...
diff --git a/mlir/test/Target/Wasm/inputs/comparison_ops.yaml.wasm b/mlir/test/Target/Wasm/inputs/comparison_ops.yaml.wasm
new file mode 100644
index 0000000..cde9ee1
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/comparison_ops.yaml.wasm
@@ -0,0 +1,88 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410C4132480B
+ - Index: 1
+ Locals: []
+ Body: 410C41324C0B
+ - Index: 2
+ Locals: []
+ Body: 410C4132490B
+ - Index: 3
+ Locals: []
+ Body: 410C41324D0B
+ - Index: 4
+ Locals: []
+ Body: 410C41324A0B
+ - Index: 5
+ Locals: []
+ Body: 410C41324B0B
+ - Index: 6
+ Locals: []
+ Body: 410C41324E0B
+ - Index: 7
+ Locals: []
+ Body: 410C41324F0B
+ - Index: 8
+ Locals: []
+ Body: 420C4232530B
+ - Index: 9
+ Locals: []
+ Body: 420C4232570B
+ - Index: 10
+ Locals: []
+ Body: 420C4232540B
+ - Index: 11
+ Locals: []
+ Body: 420C4232580B
+ - Index: 12
+ Locals: []
+ Body: 420C4232550B
+ - Index: 13
+ Locals: []
+ Body: 420C4232560B
+ - Index: 14
+ Locals: []
+ Body: 420C4232590B
+ - Index: 15
+ Locals: []
+ Body: 420C42325A0B
+ - Index: 16
+ Locals: []
+ Body: 430000A04043000060415D0B
+ - Index: 17
+ Locals: []
+ Body: 430000A04043000060415F0B
+ - Index: 18
+ Locals: []
+ Body: 430000A04043000060415E0B
+ - Index: 19
+ Locals: []
+ Body: 430000A0404300006041600B
+ - Index: 20
+ Locals: []
+ Body: 440000000000001440440000000000002C40630B
+ - Index: 21
+ Locals: []
+ Body: 440000000000001440440000000000002C40650B
+ - Index: 22
+ Locals: []
+ Body: 440000000000001440440000000000002C40640B
+ - Index: 23
+ Locals: []
+ Body: 440000000000001440440000000000002C40660B
+...
diff --git a/mlir/test/Target/Wasm/inputs/convert.yaml.wasm b/mlir/test/Target/Wasm/inputs/convert.yaml.wasm
new file mode 100644
index 0000000..c346a75
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/convert.yaml.wasm
@@ -0,0 +1,69 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - F32
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - F64
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 0, 0, 1, 1, 1, 1 ]
+ - Type: EXPORT
+ Exports:
+ - Name: convert_i32_u_to_f32
+ Kind: FUNCTION
+ Index: 0
+ - Name: convert_i32_s_to_f32
+ Kind: FUNCTION
+ Index: 1
+ - Name: convert_i64_u_to_f32
+ Kind: FUNCTION
+ Index: 2
+ - Name: convert_i64s_to_f32
+ Kind: FUNCTION
+ Index: 3
+ - Name: convert_i32_u_to_f64
+ Kind: FUNCTION
+ Index: 4
+ - Name: convert_i32_s_to_f64
+ Kind: FUNCTION
+ Index: 5
+ - Name: convert_i64_u_to_f64
+ Kind: FUNCTION
+ Index: 6
+ - Name: convert_i64s_to_f64
+ Kind: FUNCTION
+ Index: 7
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410AB30B
+ - Index: 1
+ Locals: []
+ Body: 412AB20B
+ - Index: 2
+ Locals: []
+ Body: 4211B50B
+ - Index: 3
+ Locals: []
+ Body: 420AB40B
+ - Index: 4
+ Locals: []
+ Body: 410AB80B
+ - Index: 5
+ Locals: []
+ Body: 412AB70B
+ - Index: 6
+ Locals: []
+ Body: 4211BA0B
+ - Index: 7
+ Locals: []
+ Body: 420AB90B
+...
diff --git a/mlir/test/Target/Wasm/inputs/demote.yaml.wasm b/mlir/test/Target/Wasm/inputs/demote.yaml.wasm
new file mode 100644
index 0000000..3997045
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/demote.yaml.wasm
@@ -0,0 +1,18 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - F32
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 44EC51B81E85EB0140B60B
+...
diff --git a/mlir/test/Target/Wasm/inputs/double_nested_loop.yaml.wasm b/mlir/test/Target/Wasm/inputs/double_nested_loop.yaml.wasm
new file mode 100644
index 0000000..41a2944
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/double_nested_loop.yaml.wasm
@@ -0,0 +1,19 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes: []
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals:
+ - Type: I32
+ Count: 2
+ Body: 0340200041016A2100037F41012001410C6A220120004A0D000B410A480D000B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/empty_blocks_list_and_stack.yaml.wasm b/mlir/test/Target/Wasm/inputs/empty_blocks_list_and_stack.yaml.wasm
new file mode 100644
index 0000000..3171409
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/empty_blocks_list_and_stack.yaml.wasm
@@ -0,0 +1,21 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ ReturnTypes: []
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 0240024002400B0B0B0B
+ - Index: 1
+ Locals: []
+ Body: 02400B02400B02400B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/eq.yaml.wasm b/mlir/test/Target/Wasm/inputs/eq.yaml.wasm
new file mode 100644
index 0000000..1998369
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/eq.yaml.wasm
@@ -0,0 +1,27 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410C4132460B
+ - Index: 1
+ Locals: []
+ Body: 42144205510B
+ - Index: 2
+ Locals: []
+ Body: 430000A04043000060415B0B
+ - Index: 3
+ Locals: []
+ Body: 440000000000003140440000000000000000610B
+...
diff --git a/mlir/test/Target/Wasm/inputs/eqz.yaml.wasm b/mlir/test/Target/Wasm/inputs/eqz.yaml.wasm
new file mode 100644
index 0000000..894ac50
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/eqz.yaml.wasm
@@ -0,0 +1,29 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0 ]
+ - Type: EXPORT
+ Exports:
+ - Name: eqz_i32
+ Kind: FUNCTION
+ Index: 0
+ - Name: eqz_i64
+ Kind: FUNCTION
+ Index: 1
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410D450B
+ - Index: 1
+ Locals: []
+ Body: 420D500B
+...
diff --git a/mlir/test/Target/Wasm/inputs/extend.yaml.wasm b/mlir/test/Target/Wasm/inputs/extend.yaml.wasm
new file mode 100644
index 0000000..7e872ba
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/extend.yaml.wasm
@@ -0,0 +1,40 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I64
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 1, 1, 0, 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410AAC0B
+ - Index: 1
+ Locals: []
+ Body: 410AAD0B
+ - Index: 2
+ Locals: []
+ Body: 410AC00B
+ - Index: 3
+ Locals: []
+ Body: 410AC10B
+ - Index: 4
+ Locals: []
+ Body: 420AC20B
+ - Index: 5
+ Locals: []
+ Body: 420AC30B
+ - Index: 6
+ Locals: []
+ Body: 420AC40B
+...
diff --git a/mlir/test/Target/Wasm/inputs/if.yaml.wasm b/mlir/test/Target/Wasm/inputs/if.yaml.wasm
new file mode 100644
index 0000000..ccc38f6
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/if.yaml.wasm
@@ -0,0 +1,25 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 2000410171047F200041036C41016A0520004101760B0B
+ - Index: 1
+ Locals: []
+ Body: 20002000410171040041016A0B0B
+ - Index: 2
+ Locals: []
+ Body: 200068047F4102200041017668040041026A0B0541010B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/loop.yaml.wasm b/mlir/test/Target/Wasm/inputs/loop.yaml.wasm
new file mode 100644
index 0000000..9d33894
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/loop.yaml.wasm
@@ -0,0 +1,17 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes: []
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 03400B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/loop_with_inst.yaml.wasm b/mlir/test/Target/Wasm/inputs/loop_with_inst.yaml.wasm
new file mode 100644
index 0000000..4b8cc54
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/loop_with_inst.yaml.wasm
@@ -0,0 +1,20 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals:
+ - Type: I32
+ Count: 1
+ Body: 037F200041016A21002000410A480B0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/ne.yaml.wasm b/mlir/test/Target/Wasm/inputs/ne.yaml.wasm
new file mode 100644
index 0000000..0167519
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/ne.yaml.wasm
@@ -0,0 +1,27 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 0, 0, 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410C4132470B
+ - Index: 1
+ Locals: []
+ Body: 42144205520B
+ - Index: 2
+ Locals: []
+ Body: 430000A04043000060415C0B
+ - Index: 3
+ Locals: []
+ Body: 440000000000003140440000000000000000620B
+...
diff --git a/mlir/test/Target/Wasm/inputs/promote.yaml.wasm b/mlir/test/Target/Wasm/inputs/promote.yaml.wasm
new file mode 100644
index 0000000..d38603e
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/promote.yaml.wasm
@@ -0,0 +1,18 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - F64
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 4300002841BB0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/reinterpret.yaml.wasm b/mlir/test/Target/Wasm/inputs/reinterpret.yaml.wasm
new file mode 100644
index 0000000..c01c1b1
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/reinterpret.yaml.wasm
@@ -0,0 +1,53 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - I64
+ - Index: 2
+ ParamTypes: []
+ ReturnTypes:
+ - F32
+ - Index: 3
+ ParamTypes: []
+ ReturnTypes:
+ - F64
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 1, 2, 3 ]
+ - Type: EXPORT
+ Exports:
+ - Name: i32.reinterpret_f32
+ Kind: FUNCTION
+ Index: 0
+ - Name: i64.reinterpret_f64
+ Kind: FUNCTION
+ Index: 1
+ - Name: f32.reinterpret_i32
+ Kind: FUNCTION
+ Index: 2
+ - Name: f64.reinterpret_i64
+ Kind: FUNCTION
+ Index: 3
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 43000080BFBC0B
+ - Index: 1
+ Locals: []
+ Body: 44000000000000F0BFBD0B
+ - Index: 2
+ Locals: []
+ Body: 417FBE0B
+ - Index: 3
+ Locals: []
+ Body: 427FBF0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/rounding.yaml.wasm b/mlir/test/Target/Wasm/inputs/rounding.yaml.wasm
new file mode 100644
index 0000000..c6e8bf6
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/rounding.yaml.wasm
@@ -0,0 +1,37 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes: []
+ ReturnTypes:
+ - F64
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - F32
+ - Type: FUNCTION
+ FunctionTypes: [ 0, 1, 0, 1, 0, 1 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 4433333333333328C09B0B
+ - Index: 1
+ Locals: []
+ Body: 43A01ACF3F8D0B
+ - Index: 2
+ Locals: []
+ Body: 4433333333333328C09C0B
+ - Index: 3
+ Locals: []
+ Body: 43A01ACF3F8E0B
+ - Index: 4
+ Locals: []
+ Body: 4433333333333328C09D0B
+ - Index: 5
+ Locals: []
+ Body: 43A01ACF3F8F0B
+...
diff --git a/mlir/test/Target/Wasm/inputs/wrap.yaml.wasm b/mlir/test/Target/Wasm/inputs/wrap.yaml.wasm
new file mode 100644
index 0000000..51c0b02
--- /dev/null
+++ b/mlir/test/Target/Wasm/inputs/wrap.yaml.wasm
@@ -0,0 +1,24 @@
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I64
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 0 ]
+ - Type: EXPORT
+ Exports:
+ - Name: i64_wrap
+ Kind: FUNCTION
+ Index: 0
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 2000A70B
+...
diff --git a/mlir/test/Target/Wasm/invalid_block_type_index.yaml b/mlir/test/Target/Wasm/invalid_block_type_index.yaml
new file mode 100644
index 0000000..5b83e2e
--- /dev/null
+++ b/mlir/test/Target/Wasm/invalid_block_type_index.yaml
@@ -0,0 +1,28 @@
+
+# RUN: yaml2obj %s | not mlir-translate --import-wasm -o - 2>&1 | FileCheck %s
+
+# CHECK: type index references nonexistent type (2)
+
+--- !WASM
+FileHeader:
+ Version: 0x1
+Sections:
+ - Type: TYPE
+ Signatures:
+ - Index: 0
+ ParamTypes:
+ - I32
+ ReturnTypes:
+ - I32
+ - Index: 1
+ ParamTypes: []
+ ReturnTypes:
+ - I32
+ - Type: FUNCTION
+ FunctionTypes: [ 1 ]
+ - Type: CODE
+ Functions:
+ - Index: 0
+ Locals: []
+ Body: 410E020241016A0B0B
+# -----------------------------^^ Invalid type ID
diff --git a/mlir/test/Target/Wasm/local.mlir b/mlir/test/Target/Wasm/local.mlir
index 32f5900..9844f9c 100644
--- a/mlir/test/Target/Wasm/local.mlir
+++ b/mlir/test/Target/Wasm/local.mlir
@@ -29,7 +29,7 @@
)
*/
-// CHECK-LABEL: wasmssa.func nested @func_0() -> f32 {
+// CHECK-LABEL: wasmssa.func @func_0() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local of type f32
// CHECK: %[[VAL_1:.*]] = wasmssa.local of type f32
// CHECK: %[[VAL_2:.*]] = wasmssa.const 8.000000e+00 : f32
@@ -40,7 +40,7 @@
// CHECK: %[[VAL_6:.*]] = wasmssa.add %[[VAL_3]] %[[VAL_5]] : f32
// CHECK: wasmssa.return %[[VAL_6]] : f32
-// CHECK-LABEL: wasmssa.func nested @func_1() -> i32 {
+// CHECK-LABEL: wasmssa.func @func_1() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.local of type i32
// CHECK: %[[VAL_1:.*]] = wasmssa.local of type i32
// CHECK: %[[VAL_2:.*]] = wasmssa.const 8 : i32
@@ -51,7 +51,7 @@
// CHECK: %[[VAL_6:.*]] = wasmssa.add %[[VAL_3]] %[[VAL_5]] : i32
// CHECK: wasmssa.return %[[VAL_6]] : i32
-// CHECK-LABEL: wasmssa.func nested @func_2(
+// CHECK-LABEL: wasmssa.func @func_2(
// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i32>) -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 3 : i32
// CHECK: wasmssa.local_set %[[ARG0]] : ref to i32 to %[[VAL_0]] : i32
diff --git a/mlir/test/Target/Wasm/loop.mlir b/mlir/test/Target/Wasm/loop.mlir
new file mode 100644
index 0000000..29ad502
--- /dev/null
+++ b/mlir/test/Target/Wasm/loop.mlir
@@ -0,0 +1,17 @@
+// RUN: yaml2obj %S/inputs/loop.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* IR generated from:
+(module
+ (func
+ (loop $my_loop
+ )
+ )
+)*/
+
+// CHECK-LABEL: wasmssa.func @func_0() {
+// CHECK: wasmssa.loop : {
+// CHECK: wasmssa.block_return
+// CHECK: }> ^bb1
+// CHECK: ^bb1:
+// CHECK: wasmssa.return
+// CHECK: }
diff --git a/mlir/test/Target/Wasm/loop_with_inst.mlir b/mlir/test/Target/Wasm/loop_with_inst.mlir
new file mode 100644
index 0000000..311d007
--- /dev/null
+++ b/mlir/test/Target/Wasm/loop_with_inst.mlir
@@ -0,0 +1,33 @@
+// RUN: yaml2obj %S/inputs/loop_with_inst.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Code used to create this test:
+
+(module
+ (func (result i32)
+ (local $i i32)
+ (loop $my_loop (result i32)
+ local.get $i
+ i32.const 1
+ i32.add
+ local.set $i
+ local.get $i
+ i32.const 10
+ i32.lt_s
+ )
+ )
+)*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local of type i32
+// CHECK: wasmssa.loop : {
+// CHECK: %[[VAL_1:.*]] = wasmssa.local_get %[[VAL_0]] : ref to i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.const 1 : i32
+// CHECK: %[[VAL_3:.*]] = wasmssa.add %[[VAL_1]] %[[VAL_2]] : i32
+// CHECK: wasmssa.local_set %[[VAL_0]] : ref to i32 to %[[VAL_3]] : i32
+// CHECK: %[[VAL_4:.*]] = wasmssa.local_get %[[VAL_0]] : ref to i32
+// CHECK: %[[VAL_5:.*]] = wasmssa.const 10 : i32
+// CHECK: %[[VAL_6:.*]] = wasmssa.lt_si %[[VAL_4]] %[[VAL_5]] : i32 -> i32
+// CHECK: wasmssa.block_return %[[VAL_6]] : i32
+// CHECK: }> ^bb1
+// CHECK: ^bb1(%[[VAL_7:.*]]: i32):
+// CHECK: wasmssa.return %[[VAL_7]] : i32
diff --git a/mlir/test/Target/Wasm/max.mlir b/mlir/test/Target/Wasm/max.mlir
index 4ef2042..9160bde 100644
--- a/mlir/test/Target/Wasm/max.mlir
+++ b/mlir/test/Target/Wasm/max.mlir
@@ -16,14 +16,14 @@
)
*/
-// CHECK-LABEL: wasmssa.func @min_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @min_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f32
// CHECK: %[[VAL_2:.*]] = wasmssa.max %[[VAL_0]] %[[VAL_1]] : f32
// CHECK: wasmssa.return %[[VAL_2]] : f32
-// CHECK-LABEL: wasmssa.func @min_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @min_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f64
// CHECK: %[[VAL_2:.*]] = wasmssa.max %[[VAL_0]] %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/memory_min_eq_max.mlir b/mlir/test/Target/Wasm/memory_min_eq_max.mlir
index 2ba5ab5..ea8f719 100644
--- a/mlir/test/Target/Wasm/memory_min_eq_max.mlir
+++ b/mlir/test/Target/Wasm/memory_min_eq_max.mlir
@@ -4,4 +4,4 @@
(module (memory 0 0))
*/
-// CHECK-LABEL: wasmssa.memory @mem_0 nested !wasmssa<limit[0: 0]>
+// CHECK-LABEL: wasmssa.memory @mem_0 !wasmssa<limit[0: 0]>
diff --git a/mlir/test/Target/Wasm/memory_min_max.mlir b/mlir/test/Target/Wasm/memory_min_max.mlir
index ebf6418..88782ec 100644
--- a/mlir/test/Target/Wasm/memory_min_max.mlir
+++ b/mlir/test/Target/Wasm/memory_min_max.mlir
@@ -4,4 +4,4 @@
(module (memory 0 65536))
*/
-// CHECK-LABEL: wasmssa.memory @mem_0 nested !wasmssa<limit[0: 65536]>
+// CHECK-LABEL: wasmssa.memory @mem_0 !wasmssa<limit[0: 65536]>
diff --git a/mlir/test/Target/Wasm/memory_min_no_max.mlir b/mlir/test/Target/Wasm/memory_min_no_max.mlir
index 8d88786..c10c5cc 100644
--- a/mlir/test/Target/Wasm/memory_min_no_max.mlir
+++ b/mlir/test/Target/Wasm/memory_min_no_max.mlir
@@ -4,4 +4,4 @@
(module (memory 1))
*/
-// CHECK-LABEL: wasmssa.memory @mem_0 nested !wasmssa<limit[1:]>
+// CHECK-LABEL: wasmssa.memory @mem_0 !wasmssa<limit[1:]>
diff --git a/mlir/test/Target/Wasm/min.mlir b/mlir/test/Target/Wasm/min.mlir
index 1058c7d..2372bcc 100644
--- a/mlir/test/Target/Wasm/min.mlir
+++ b/mlir/test/Target/Wasm/min.mlir
@@ -16,13 +16,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @min_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @min_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f32
// CHECK: %[[VAL_2:.*]] = wasmssa.min %[[VAL_0]] %[[VAL_1]] : f32
// CHECK: wasmssa.return %[[VAL_2]] : f32
-// CHECK-LABEL: wasmssa.func @min_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @min_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.000000e+00 : f64
// CHECK: %[[VAL_2:.*]] = wasmssa.min %[[VAL_0]] %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/ne.mlir b/mlir/test/Target/Wasm/ne.mlir
new file mode 100644
index 0000000..331df75
--- /dev/null
+++ b/mlir/test/Target/Wasm/ne.mlir
@@ -0,0 +1,52 @@
+// RUN: yaml2obj %S/inputs/ne.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func $ne_i32 (result i32)
+ i32.const 12
+ i32.const 50
+ i32.ne
+ )
+
+ (func $ne_i64 (result i32)
+ i64.const 20
+ i64.const 5
+ i64.ne
+ )
+
+ (func $ne_f32 (result i32)
+ f32.const 5
+ f32.const 14
+ f32.ne
+ )
+
+ (func $ne_f64 (result i32)
+ f64.const 17
+ f64.const 0
+ f64.ne
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
+// CHECK: %[[VAL_2:.*]] = wasmssa.ne %[[VAL_0]] %[[VAL_1]] : i32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_1() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 20 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 5 : i64
+// CHECK: %[[VAL_2:.*]] = wasmssa.ne %[[VAL_0]] %[[VAL_1]] : i64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_2() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
+// CHECK: %[[VAL_2:.*]] = wasmssa.ne %[[VAL_0]] %[[VAL_1]] : f32 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
+
+// CHECK-LABEL: wasmssa.func @func_3() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.700000e+01 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.const 0.000000e+00 : f64
+// CHECK: %[[VAL_2:.*]] = wasmssa.ne %[[VAL_0]] %[[VAL_1]] : f64 -> i32
+// CHECK: wasmssa.return %[[VAL_2]] : i32
diff --git a/mlir/test/Target/Wasm/neg.mlir b/mlir/test/Target/Wasm/neg.mlir
index 5811ab50..dae8ee5 100644
--- a/mlir/test/Target/Wasm/neg.mlir
+++ b/mlir/test/Target/Wasm/neg.mlir
@@ -12,12 +12,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @neg_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @neg_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.neg %[[VAL_0]] : f32
// CHECK: wasmssa.return %[[VAL_1]] : f32
-// CHECK-LABEL: wasmssa.func @neg_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @neg_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.neg %[[VAL_0]] : f64
// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/or.mlir b/mlir/test/Target/Wasm/or.mlir
index 521f2ba..be0b3d7 100644
--- a/mlir/test/Target/Wasm/or.mlir
+++ b/mlir/test/Target/Wasm/or.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @or_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @or_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.or %0 %1 : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @or_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @or_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.or %0 %1 : i64
diff --git a/mlir/test/Target/Wasm/popcnt.mlir b/mlir/test/Target/Wasm/popcnt.mlir
index 235333a..bfaa8eb 100644
--- a/mlir/test/Target/Wasm/popcnt.mlir
+++ b/mlir/test/Target/Wasm/popcnt.mlir
@@ -14,12 +14,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @popcnt_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @popcnt_i32() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.popcnt %[[VAL_0]] : i32
// CHECK: wasmssa.return %[[VAL_1]] : i32
-// CHECK-LABEL: wasmssa.func @popcnt_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @popcnt_i64() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 10 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.popcnt %[[VAL_0]] : i64
// CHECK: wasmssa.return %[[VAL_1]] : i64
diff --git a/mlir/test/Target/Wasm/promote.mlir b/mlir/test/Target/Wasm/promote.mlir
new file mode 100644
index 0000000..44c31b6
--- /dev/null
+++ b/mlir/test/Target/Wasm/promote.mlir
@@ -0,0 +1,14 @@
+// RUN: yaml2obj %S/inputs/promote.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/* Source code used to generate this test:
+(module
+ (func $main (result f64)
+ f32.const 10.5
+ f64.promote_f32
+ )
+)*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.050000e+01 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.promote %[[VAL_0]] : f32 to f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/reinterpret.mlir b/mlir/test/Target/Wasm/reinterpret.mlir
new file mode 100644
index 0000000..574d13f
--- /dev/null
+++ b/mlir/test/Target/Wasm/reinterpret.mlir
@@ -0,0 +1,46 @@
+// RUN: yaml2obj %S/inputs/reinterpret.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+
+/*
+Test generated from:
+(module
+ (func (export "i32.reinterpret_f32") (result i32)
+ f32.const -1
+ i32.reinterpret_f32
+ )
+
+ (func (export "i64.reinterpret_f64") (result i64)
+ f64.const -1
+ i64.reinterpret_f64
+ )
+
+ (func (export "f32.reinterpret_i32") (result f32)
+ i32.const -1
+ f32.reinterpret_i32
+ )
+
+ (func (export "f64.reinterpret_i64") (result f64)
+ i64.const -1
+ f64.reinterpret_i64
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func exported @i32.reinterpret_f32() -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1.000000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.reinterpret %[[VAL_0]] : f32 as i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
+
+// CHECK-LABEL: wasmssa.func exported @i64.reinterpret_f64() -> i64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1.000000e+00 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.reinterpret %[[VAL_0]] : f64 as i64
+// CHECK: wasmssa.return %[[VAL_1]] : i64
+
+// CHECK-LABEL: wasmssa.func exported @f32.reinterpret_i32() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1 : i32
+// CHECK: %[[VAL_1:.*]] = wasmssa.reinterpret %[[VAL_0]] : i32 as f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func exported @f64.reinterpret_i64() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1 : i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.reinterpret %[[VAL_0]] : i64 as f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/rem.mlir b/mlir/test/Target/Wasm/rem.mlir
index b19b8d9..16c9c78 100644
--- a/mlir/test/Target/Wasm/rem.mlir
+++ b/mlir/test/Target/Wasm/rem.mlir
@@ -24,28 +24,28 @@
)
*/
-// CHECK-LABEL: wasmssa.func @rem_u_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @rem_u_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.rem_ui %0 %1 : i32
// CHECK: wasmssa.return %2 : i32
// CHECK: }
-// CHECK-LABEL: wasmssa.func @rem_u_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @rem_u_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.rem_ui %0 %1 : i64
// CHECK: wasmssa.return %2 : i64
// CHECK: }
-// CHECK-LABEL: wasmssa.func @rem_s_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @rem_s_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.rem_si %0 %1 : i32
// CHECK: wasmssa.return %2 : i32
// CHECK: }
-// CHECK-LABEL: wasmssa.func @rem_s_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @rem_s_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.rem_si %0 %1 : i64
diff --git a/mlir/test/Target/Wasm/rotl.mlir b/mlir/test/Target/Wasm/rotl.mlir
index ec573554..4c2e5af 100644
--- a/mlir/test/Target/Wasm/rotl.mlir
+++ b/mlir/test/Target/Wasm/rotl.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @rotl_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @rotl_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.rotl %0 by %1 bits : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @rotl_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @rotl_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.rotl %0 by %1 bits : i64
diff --git a/mlir/test/Target/Wasm/rotr.mlir b/mlir/test/Target/Wasm/rotr.mlir
index 5618b43..ec403d0 100644
--- a/mlir/test/Target/Wasm/rotr.mlir
+++ b/mlir/test/Target/Wasm/rotr.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @rotr_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @rotr_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.rotr %0 by %1 bits : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @rotr_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @rotr_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.rotr %0 by %1 bits : i64
diff --git a/mlir/test/Target/Wasm/rounding.mlir b/mlir/test/Target/Wasm/rounding.mlir
new file mode 100644
index 0000000..947637e
--- /dev/null
+++ b/mlir/test/Target/Wasm/rounding.mlir
@@ -0,0 +1,50 @@
+// RUN: yaml2obj %S/inputs/rounding.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func $ceil_f64 (result f64)
+ f64.const -12.1
+ f64.ceil
+ )
+ (func $ceil_f32 (result f32)
+ f32.const 1.618
+ f32.ceil
+ )
+ (func $floor_f64 (result f64)
+ f64.const -12.1
+ f64.floor
+ )
+ (func $floor_f32 (result f32)
+ f32.const 1.618
+ f32.floor
+ )
+*/
+
+// CHECK-LABEL: wasmssa.func @func_0() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1.210000e+01 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.ceil %[[VAL_0]] : f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func @func_1() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.618000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.ceil %[[VAL_0]] : f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func @func_2() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1.210000e+01 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.floor %[[VAL_0]] : f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func @func_3() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.618000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.floor %[[VAL_0]] : f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
+
+// CHECK-LABEL: wasmssa.func @func_4() -> f64 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const -1.210000e+01 : f64
+// CHECK: %[[VAL_1:.*]] = wasmssa.trunc %[[VAL_0]] : f64
+// CHECK: wasmssa.return %[[VAL_1]] : f64
+
+// CHECK-LABEL: wasmssa.func @func_5() -> f32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.618000e+00 : f32
+// CHECK: %[[VAL_1:.*]] = wasmssa.trunc %[[VAL_0]] : f32
+// CHECK: wasmssa.return %[[VAL_1]] : f32
diff --git a/mlir/test/Target/Wasm/shl.mlir b/mlir/test/Target/Wasm/shl.mlir
index f2bdd57..1363112 100644
--- a/mlir/test/Target/Wasm/shl.mlir
+++ b/mlir/test/Target/Wasm/shl.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @shl_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @shl_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.shl %0 by %1 bits : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @shl_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @shl_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.shl %0 by %1 bits : i64
diff --git a/mlir/test/Target/Wasm/shr_s.mlir b/mlir/test/Target/Wasm/shr_s.mlir
index 247d9be..da1a38f 100644
--- a/mlir/test/Target/Wasm/shr_s.mlir
+++ b/mlir/test/Target/Wasm/shr_s.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @shr_s_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @shr_s_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.shr_s %0 by %1 bits : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @shr_s_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @shr_s_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.shr_s %0 by %1 bits : i64
diff --git a/mlir/test/Target/Wasm/shr_u.mlir b/mlir/test/Target/Wasm/shr_u.mlir
index 9a79eed..2991c2a 100644
--- a/mlir/test/Target/Wasm/shr_u.mlir
+++ b/mlir/test/Target/Wasm/shr_u.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @shr_u_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @shr_u_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.shr_u %0 by %1 bits : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @shr_u_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @shr_u_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.shr_u %0 by %1 bits : i64
diff --git a/mlir/test/Target/Wasm/sqrt.mlir b/mlir/test/Target/Wasm/sqrt.mlir
index 77444ad..6b968d6 100644
--- a/mlir/test/Target/Wasm/sqrt.mlir
+++ b/mlir/test/Target/Wasm/sqrt.mlir
@@ -12,12 +12,12 @@
)
*/
-// CHECK-LABEL: wasmssa.func @sqrt_f32() -> f32 {
+// CHECK-LABEL: wasmssa.func exported @sqrt_f32() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.sqrt %[[VAL_0]] : f32
// CHECK: wasmssa.return %[[VAL_1]] : f32
-// CHECK-LABEL: wasmssa.func @sqrt_f64() -> f64 {
+// CHECK-LABEL: wasmssa.func exported @sqrt_f64() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.000000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.sqrt %[[VAL_0]] : f64
// CHECK: wasmssa.return %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/sub.mlir b/mlir/test/Target/Wasm/sub.mlir
index b9c6caf..5b242f4 100644
--- a/mlir/test/Target/Wasm/sub.mlir
+++ b/mlir/test/Target/Wasm/sub.mlir
@@ -27,25 +27,25 @@
)
*/
-// CHECK-LABEL: wasmssa.func nested @func_0() -> i32 {
+// CHECK-LABEL: wasmssa.func @func_0() -> i32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 12 : i32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 50 : i32
// CHECK: %[[VAL_2:.*]] = wasmssa.sub %[[VAL_0]] %[[VAL_1]] : i32
// CHECK: wasmssa.return %[[VAL_2]] : i32
-// CHECK-LABEL: wasmssa.func nested @func_1() -> i64 {
+// CHECK-LABEL: wasmssa.func @func_1() -> i64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 20 : i64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 5 : i64
// CHECK: %[[VAL_2:.*]] = wasmssa.sub %[[VAL_0]] %[[VAL_1]] : i64
// CHECK: wasmssa.return %[[VAL_2]] : i64
-// CHECK-LABEL: wasmssa.func nested @func_2() -> f32 {
+// CHECK-LABEL: wasmssa.func @func_2() -> f32 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 5.000000e+00 : f32
// CHECK: %[[VAL_1:.*]] = wasmssa.const 1.400000e+01 : f32
// CHECK: %[[VAL_2:.*]] = wasmssa.sub %[[VAL_0]] %[[VAL_1]] : f32
// CHECK: wasmssa.return %[[VAL_2]] : f32
-// CHECK-LABEL: wasmssa.func nested @func_3() -> f64 {
+// CHECK-LABEL: wasmssa.func @func_3() -> f64 {
// CHECK: %[[VAL_0:.*]] = wasmssa.const 1.700000e+01 : f64
// CHECK: %[[VAL_1:.*]] = wasmssa.const 0.000000e+00 : f64
// CHECK: %[[VAL_2:.*]] = wasmssa.sub %[[VAL_0]] %[[VAL_1]] : f64
diff --git a/mlir/test/Target/Wasm/wrap.mlir b/mlir/test/Target/Wasm/wrap.mlir
new file mode 100644
index 0000000..1266758
--- /dev/null
+++ b/mlir/test/Target/Wasm/wrap.mlir
@@ -0,0 +1,15 @@
+// RUN: yaml2obj %S/inputs/wrap.yaml.wasm -o - | mlir-translate --import-wasm | FileCheck %s
+/* Source code used to create this test:
+(module
+ (func (export "i64_wrap") (param $in i64) (result i32)
+ local.get $in
+ i32.wrap_i64
+ )
+)
+*/
+
+// CHECK-LABEL: wasmssa.func exported @i64_wrap(
+// CHECK-SAME: %[[ARG0:.*]]: !wasmssa<local ref to i64>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = wasmssa.local_get %[[ARG0]] : ref to i64
+// CHECK: %[[VAL_1:.*]] = wasmssa.wrap %[[VAL_0]] : i64 to i32
+// CHECK: wasmssa.return %[[VAL_1]] : i32
diff --git a/mlir/test/Target/Wasm/xor.mlir b/mlir/test/Target/Wasm/xor.mlir
index 94691de..56407db 100644
--- a/mlir/test/Target/Wasm/xor.mlir
+++ b/mlir/test/Target/Wasm/xor.mlir
@@ -14,13 +14,13 @@
)
*/
-// CHECK-LABEL: wasmssa.func @xor_i32() -> i32 {
+// CHECK-LABEL: wasmssa.func exported @xor_i32() -> i32 {
// CHECK: %0 = wasmssa.const 10 : i32
// CHECK: %1 = wasmssa.const 3 : i32
// CHECK: %2 = wasmssa.xor %0 %1 : i32
// CHECK: wasmssa.return %2 : i32
-// CHECK-LABEL: wasmssa.func @xor_i64() -> i64 {
+// CHECK-LABEL: wasmssa.func exported @xor_i64() -> i64 {
// CHECK: %0 = wasmssa.const 10 : i64
// CHECK: %1 = wasmssa.const 3 : i64
// CHECK: %2 = wasmssa.xor %0 %1 : i64
diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt
index 9187998..c37671a 100644
--- a/mlir/test/lib/Analysis/CMakeLists.txt
+++ b/mlir/test/lib/Analysis/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis
DataFlow/TestDenseForwardDataFlowAnalysis.cpp
DataFlow/TestLivenessAnalysis.cpp
DataFlow/TestSparseBackwardDataFlowAnalysis.cpp
+ DataFlow/TestStridedMetadataRangeAnalysis.cpp
EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..6ac09fd
--- /dev/null
+++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,86 @@
+//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlowFramework.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+static void printAnalysisResults(DataFlowSolver &solver, Operation *op,
+ raw_ostream &os) {
+ // Collect the strided metadata of the op results.
+ SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results;
+ for (OpResult result : op->getResults()) {
+ const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result);
+ // Skip the result if it's uninitialized.
+ if (!state || state->getValue().isUninitialized())
+ continue;
+
+ // Skip the result if the range is empty.
+ const mlir::StridedMetadataRange &md = state->getValue();
+ if (md.getOffsets().empty() && md.getSizes().empty() &&
+ md.getStrides().empty())
+ continue;
+ results.push_back({result.getResultNumber(), state});
+ }
+
+ // Early exit if there's no metadata to print.
+ if (results.empty())
+ return;
+
+ // Print the metadata.
+ os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n";
+ for (auto [idx, state] : results)
+ os << " result[" << idx << "]: " << state->getValue() << "\n";
+ os << "\n";
+}
+
+namespace {
+struct TestStridedMetadataRangeAnalysisPass
+ : public PassWrapper<TestStridedMetadataRangeAnalysisPass,
+ OperationPass<>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+ TestStridedMetadataRangeAnalysisPass)
+
+ StringRef getArgument() const override {
+ return "test-strided-metadata-range-analysis";
+ }
+ void runOnOperation() override {
+ Operation *op = getOperation();
+
+ DataFlowSolver solver;
+ solver.load<DeadCodeAnalysis>();
+ solver.load<SparseConstantPropagation>();
+ solver.load<IntegerRangeAnalysis>();
+ solver.load<StridedMetadataRangeAnalysis>();
+ if (failed(solver.initializeAndRun(op)))
+ return signalPassFailure();
+
+ op->walk(
+ [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); });
+ }
+};
+} // end anonymous namespace
+
+namespace mlir {
+namespace test {
+void registerTestStridedMetadataRangeAnalysisPass() {
+ PassRegistration<TestStridedMetadataRangeAnalysisPass>();
+}
+} // end namespace test
+} // end namespace mlir
diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
index 1e2d4a7..4069a74 100644
--- a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
+++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
@@ -11,11 +11,25 @@
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/Pass.h"
+#include "TestAttributes.h" // TestTensorEncodingAttr, TestMemRefLayoutAttr
+#include "TestDialect.h"
+
using namespace mlir;
namespace {
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding = dyn_cast_if_present<test::TestTensorEncodingAttr>(
+ tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
struct TestOneShotModuleBufferizePass
: public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass)
@@ -25,6 +39,7 @@ struct TestOneShotModuleBufferizePass
: PassWrapper(pass) {}
void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<test::TestDialect>();
registry.insert<bufferization::BufferizationDialect>();
}
StringRef getArgument() const final {
@@ -41,6 +56,17 @@ struct TestOneShotModuleBufferizePass
bufferization::OneShotBufferizationOptions opt;
opt.bufferizeFunctionBoundaries = true;
+ opt.functionArgTypeConverterFn =
+ [&](bufferization::TensorLikeType tensor, Attribute memSpace,
+ func::FuncOp, const bufferization::BufferizationOptions &) {
+ assert(isa<RankedTensorType>(tensor) && "tests only builtin tensors");
+ auto tensorType = cast<RankedTensorType>(tensor);
+ auto layout = getMemRefLayoutForTensorEncoding(tensorType);
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(tensorType.getShape(),
+ tensorType.getElementType(), layout, memSpace));
+ };
+
bufferization::BufferizationState bufferizationState;
if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt,
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index 727c84c..8c5c8e8 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -276,10 +276,8 @@ void TestLinalgTransforms::runOnOperation() {
Operation *consumer = opOperand->getOwner();
// If we have a pack/unpack consumer and a producer that has multiple
// uses, do not apply the folding patterns.
- if (isa<linalg::PackOp, linalg::UnPackOp>(consumer) &&
- isa<TilingInterface>(producer) && !producer->hasOneUse())
- return false;
- return true;
+ return !(isa<linalg::PackOp, linalg::UnPackOp>(consumer) &&
+ isa<TilingInterface>(producer) && !producer->hasOneUse());
};
applyFoldIntoPackAndUnpackPatterns(rootOp, controlFn);
}
diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
index f84055d..1e59338 100644
--- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
@@ -1,6 +1,7 @@
add_mlir_library(MLIROpenACCTestPasses
TestOpenACC.cpp
TestPointerLikeTypeInterface.cpp
+ TestRecipePopulate.cpp
EXCLUDE_FROM_LIBMLIR
)
diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
index 9886240..bea21b9 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
@@ -15,9 +15,13 @@ namespace test {
// Forward declarations of individual test pass registration functions
void registerTestPointerLikeTypeInterfacePass();
+void registerTestRecipePopulatePass();
// Unified registration function for all OpenACC tests
-void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); }
+void registerTestOpenACC() {
+ registerTestPointerLikeTypeInterfacePass();
+ registerTestRecipePopulatePass();
+}
} // namespace test
} // namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
index 85f9283..027b0a1 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate(
newBuilder.setInsertionPointAfter(op);
// Call the genAllocate API
+ bool needsFree = false;
Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc",
- result.getType(), result);
+ result.getType(), result, needsFree);
if (allocRes) {
llvm::errs() << "Successfully generated alloc for operation: ";
op->print(llvm::errs());
llvm::errs() << "\n";
+ llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n";
// Print all operations that were inserted
for (Operation *insertedOp : tracker.insertedOps) {
@@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result,
// Call the genFree API
auto typedResult = cast<TypedValue<PointerLikeType>>(result);
- bool success =
- pointerType.genFree(newBuilder, loc, typedResult, result.getType());
+ bool success = pointerType.genFree(newBuilder, loc, typedResult, result,
+ result.getType());
if (success) {
llvm::errs() << "Successfully generated free for operation: ";
diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
new file mode 100644
index 0000000..35f092c
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
@@ -0,0 +1,110 @@
+//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains test passes for testing the createAndPopulate methods
+// of the recipe operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+namespace {
+
+struct TestRecipePopulatePass
+ : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass)
+
+ TestRecipePopulatePass() = default;
+ TestRecipePopulatePass(const TestRecipePopulatePass &pass)
+ : PassWrapper(pass) {
+ recipeType = pass.recipeType;
+ }
+
+ Pass::Option<std::string> recipeType{
+ *this, "recipe-type",
+ llvm::cl::desc("Recipe type: private or firstprivate"),
+ llvm::cl::init("private")};
+
+ StringRef getArgument() const override { return "test-acc-recipe-populate"; }
+
+ StringRef getDescription() const override {
+ return "Test OpenACC recipe population";
+ }
+
+ void runOnOperation() override;
+
+ void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<acc::OpenACCDialect>();
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ }
+};
+
+void TestRecipePopulatePass::runOnOperation() {
+ auto module = getOperation();
+ OpBuilder builder(&getContext());
+
+ // Collect all test variables
+ SmallVector<std::tuple<Operation *, Value, std::string>> testVars;
+
+ module.walk([&](Operation *op) {
+ if (auto varName = op->getAttrOfType<StringAttr>("test.var")) {
+ for (auto result : op->getResults()) {
+ testVars.push_back({op, result, varName.str()});
+ }
+ }
+ });
+
+ // Generate recipes at module level
+ builder.setInsertionPoint(&module.getBodyRegion().front(),
+ module.getBodyRegion().front().begin());
+
+ for (auto [op, var, varName] : testVars) {
+ Location loc = op->getLoc();
+
+ std::string recipeName = recipeType.getValue() + "_" + varName;
+ ValueRange bounds; // No bounds for memref tests
+
+ if (recipeType == "private") {
+ auto recipe = PrivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create private recipe for ") << varName;
+ }
+ } else if (recipeType == "firstprivate") {
+ auto recipe = FirstprivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create firstprivate recipe for ") << varName;
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace mlir {
+namespace test {
+
+void registerTestRecipePopulatePass() {
+ PassRegistration<TestRecipePopulatePass>();
+}
+
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
index 5685004..9e7e4f8 100644
--- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td
+++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
@@ -22,6 +22,7 @@ include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/TensorEncoding.td"
// All of the attributes will extend this class.
class Test_Attr<string name, list<Trait> traits = []>
@@ -439,4 +440,20 @@ def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> {
let hasStorageCustomConstructor = 1;
}
+def TestTensorEncodingAttr : Test_Attr<"TestTensorEncoding",
+ [DeclareAttrInterfaceMethods<VerifiableTensorEncoding>]> {
+ let mnemonic = "tensor_encoding";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
+def TestMemRefLayoutAttr : Test_Attr<"TestMemRefLayout",
+ [DeclareAttrInterfaceMethods<MemRefLayoutAttrInterface>]> {
+ let mnemonic = "memref_layout";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
#endif // TEST_ATTRDEFS
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
index fe1e916..9db7b01 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
@@ -542,6 +542,24 @@ test::detail::TestCustomStorageCtorAttrAttrStorage::construct(
}
//===----------------------------------------------------------------------===//
+// TestTensorEncodingAttr
+//===----------------------------------------------------------------------===//
+
+::llvm::LogicalResult TestTensorEncodingAttr::verifyEncoding(
+ mlir::ArrayRef<int64_t> shape, mlir::Type elementType,
+ llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const {
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// TestMemRefLayoutAttr
+//===----------------------------------------------------------------------===//
+
+mlir::AffineMap TestMemRefLayoutAttr::getAffineMap() const {
+ return mlir::AffineMap::getMultiDimIdentityMap(1, getContext());
+}
+
+//===----------------------------------------------------------------------===//
// TestDialect
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.h b/mlir/test/lib/Dialect/Test/TestAttributes.h
index 778d84fa..0ad5ab6 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.h
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.h
@@ -24,6 +24,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/DialectResourceBlobManager.h"
+#include "mlir/IR/TensorEncoding.h"
// generated files require above includes to come first
#include "TestAttrInterfaces.h.inc"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index f2adca6..bcf3b55d 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
#include "TestInterfaces.h"
#include "TestTypes.h"
#include "mlir/Bytecode/BytecodeImplementation.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.td b/mlir/test/lib/Dialect/Test/TestDialect.td
index 2b5491f..37a263f 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.td
+++ b/mlir/test/lib/Dialect/Test/TestDialect.td
@@ -24,7 +24,10 @@ def Test_Dialect : Dialect {
let useDefaultTypePrinterParser = 0;
let useDefaultAttributePrinterParser = 1;
let isExtensible = 1;
- let dependentDialects = ["::mlir::DLTIDialect"];
+ let dependentDialects = [
+ "::mlir::DLTIDialect",
+ "::mlir::bufferization::BufferizationDialect"
+ ];
let discardableAttrs = (ins
"mlir::IntegerAttr":$discardable_attr_key,
"SimpleAAttr":$other_discardable_attr_key
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 53055fe..b211e24 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1425,6 +1425,39 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return createNewMultiAllocaWithoutSlot(slot, builder, *this);
}
+namespace {
+/// Returns test dialect's memref layout for test dialect's tensor encoding when
+/// applicable.
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding =
+ dyn_cast<test::TestTensorEncodingAttr>(tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
+/// Auxiliary bufferization function for test and builtin tensors.
+bufferization::BufferLikeType
+convertTensorToBuffer(mlir::Operation *op,
+ const bufferization::BufferizationOptions &options,
+ bufferization::TensorLikeType tensorLike) {
+ auto buffer =
+ *tensorLike.getBufferType(options, [&]() { return op->emitError(); });
+ if (auto memref = dyn_cast<MemRefType>(buffer)) {
+ // Note: For the sake of testing, we want to ensure that encoding -> layout
+ // bufferization happens. This is currently achieved manually.
+ auto layout =
+ getMemRefLayoutForTensorEncoding(cast<RankedTensorType>(tensorLike));
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(memref.getShape(), memref.getElementType(), layout,
+ memref.getMemorySpace()));
+ }
+ return buffer;
+}
+} // namespace
+
::mlir::LogicalResult test::TestDummyTensorOp::bufferize(
::mlir::RewriterBase &rewriter,
const ::mlir::bufferization::BufferizationOptions &options,
@@ -1435,8 +1468,8 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return failure();
const auto outType = getOutput().getType();
- const auto bufferizedOutType = test::TestMemrefType::get(
- getContext(), outType.getShape(), outType.getElementType(), nullptr);
+ const auto bufferizedOutType =
+ convertTensorToBuffer(getOperation(), options, outType);
// replace op with memref analogy
auto dummyMemrefOp = test::TestDummyMemrefOp::create(
rewriter, getLoc(), bufferizedOutType, *buffer);
@@ -1470,13 +1503,12 @@ TestMultiSlotAlloca::handleDestructuringComplete(
mlir::FailureOr<mlir::bufferization::BufferLikeType>
test::TestCreateTensorOp::getBufferType(
- mlir::Value value, const mlir::bufferization::BufferizationOptions &,
+ mlir::Value value, const mlir::bufferization::BufferizationOptions &options,
const mlir::bufferization::BufferizationState &,
llvm::SmallVector<::mlir::Value> &) {
- const auto type = dyn_cast<test::TestTensorType>(value.getType());
+ const auto type = dyn_cast<bufferization::TensorLikeType>(value.getType());
if (type == nullptr)
return failure();
- return cast<mlir::bufferization::BufferLikeType>(test::TestMemrefType::get(
- getContext(), type.getShape(), type.getElementType(), nullptr));
+ return convertTensorToBuffer(getOperation(), options, type);
}
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6329d61..05a33cf 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -32,6 +32,7 @@ include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ValueBoundsOpInterface.td"
include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td"
// Include the attribute definitions.
include "TestAttrDefs.td"
@@ -2335,7 +2336,7 @@ def SideEffectWithRegionOp : TEST_Op<"side_effect_with_region_op",
}
//===----------------------------------------------------------------------===//
-// Copy Operation Test
+// Copy Operation Test
//===----------------------------------------------------------------------===//
def CopyOp : TEST_Op<"copy", []> {
@@ -3676,10 +3677,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
["bufferize", "bufferizesToMemoryRead",
"bufferizesToMemoryWrite", "getAliasingValues"]>]> {
let arguments = (ins
- Arg<TestTensorType>:$input
+ Arg<Bufferization_TensorLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestTensorType>:$output
+ Arg<Bufferization_TensorLikeTypeInterface>:$output
);
let extraClassDefinition = [{
@@ -3701,10 +3702,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> {
let arguments = (ins
- Arg<TestMemrefType>:$input
+ Arg<Bufferization_BufferLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestMemrefType>:$output
+ Arg<Bufferization_BufferLikeTypeInterface>:$output
);
}
@@ -3714,7 +3715,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
"bufferizesToMemoryWrite", "getAliasingValues",
"bufferizesToAllocation"]>]> {
let arguments = (ins);
- let results = (outs Arg<TestTensorType>:$output);
+ let results = (outs Arg<Bufferization_TensorLikeTypeInterface>:$output);
let extraClassDefinition = [{
bool test::TestCreateTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&,
const ::mlir::bufferization::AnalysisState&) {
@@ -3738,7 +3739,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
def TestCreateMemrefOp : TEST_Op<"create_memref_op"> {
let arguments = (ins);
- let results = (outs Arg<TestMemrefType>:$output);
+ let results = (outs Arg<Bufferization_BufferLikeTypeInterface>:$output);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
index 97fc699..496f18b 100644
--- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
+++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp
@@ -938,10 +938,10 @@ public:
// These are automatically generated by ODS but are not used as the Transform
// dialect uses a different dispatch mechanism to support dialect extensions.
-LLVM_ATTRIBUTE_UNUSED static OptionalParseResult
+[[maybe_unused]] static OptionalParseResult
generatedTypeParser(AsmParser &parser, StringRef *mnemonic, Type &value);
-LLVM_ATTRIBUTE_UNUSED static LogicalResult
-generatedTypePrinter(Type def, AsmPrinter &printer);
+[[maybe_unused]] static LogicalResult generatedTypePrinter(Type def,
+ AsmPrinter &printer);
#define GET_TYPEDEF_CLASSES
#include "TestTransformDialectExtensionTypes.cpp.inc"
diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp
index 3b25686..5ca2d1a 100644
--- a/mlir/test/lib/Pass/TestRemarksPass.cpp
+++ b/mlir/test/lib/Pass/TestRemarksPass.cpp
@@ -43,7 +43,12 @@ public:
<< remark::add("This is a test missed remark")
<< remark::reason("because we are testing the remark pipeline")
<< remark::suggest("try using the remark pipeline feature");
-
+ mlir::remark::passed(
+ loc,
+ remark::RemarkOpts::name("test-remark").category("category-1-passed"))
+ << remark::add("This is a test passed remark (should be dropped)")
+ << remark::reason("because we are testing the remark pipeline")
+ << remark::suggest("try using the remark pipeline feature");
mlir::remark::passed(
loc,
remark::RemarkOpts::name("test-remark").category("category-1-passed"))
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index f99c24d..6ff12d6 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -348,6 +348,9 @@ if config.enable_assertions:
else:
config.available_features.add("noasserts")
+if config.expensive_checks:
+ config.available_features.add("expensive_checks")
+
def have_host_jit_feature_support(feature_name):
mlir_runner_exe = lit.util.which("mlir-runner", config.mlir_tools_dir)
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 1aaf798..91a71af 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -11,6 +11,7 @@ config.llvm_shlib_ext = "@SHLIBEXT@"
config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@"))
config.python_executable = "@Python3_EXECUTABLE@"
config.enable_assertions = @ENABLE_ASSERTIONS@
+config.expensive_checks = "@EXPENSIVE_CHECKS@"
config.native_target = "@LLVM_NATIVE_ARCH@"
config.host_os = "@HOST_OS@"
config.host_cc = "@HOST_CC@"
diff --git a/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll b/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll
index 4e869e5..4be30d8 100644
--- a/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll
+++ b/mlir/test/mlir-pdll/CodeGen/CPP/general.pdll
@@ -28,7 +28,7 @@
// CHECK: operation "test.op3"
// CHECK: )mlir", context), std::forward<ConfigsT>(configs)...)
-// CHECK: static void LLVM_ATTRIBUTE_UNUSED populateGeneratedPDLLPatterns(::mlir::RewritePatternSet &patterns, ConfigsT &&...configs) {
+// CHECK{LITERAL}: [[maybe_unused]] static void populateGeneratedPDLLPatterns(::mlir::RewritePatternSet &patterns, ConfigsT &&...configs) {
// CHECK-NEXT: patterns.add<GeneratedPDLLPattern0>(patterns.getContext(), configs...);
// CHECK-NEXT: patterns.add<NamedPattern>(patterns.getContext(), configs...);
// CHECK-NEXT: patterns.add<GeneratedPDLLPattern1>(patterns.getContext(), configs...);
diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td
index a896888..9dcf975 100644
--- a/mlir/test/mlir-tblgen/cpp-class-comments.td
+++ b/mlir/test/mlir-tblgen/cpp-class-comments.td
@@ -96,17 +96,14 @@ def EncodingTrait : AttrInterface<"EncodingTrait"> {
}];
let methods = [
];
-// ATTR-INTERFACE: namespace mlir
-// ATTR-INTERFACE-NEXT: namespace a
-// ATTR-INTERFACE-NEXT: namespace traits
+// ATTR-INTERFACE: namespace mlir::a::traits {
// ATTR-INTERFACE-NEXT: /// Common trait for all layouts.
// ATTR-INTERFACE-NEXT: class EncodingTrait;
}
def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> {
let cppNamespace = "a::traits";
-// ATTR-INTERFACE: namespace a {
-// ATTR-INTERFACE-NEXT: namespace traits {
+// ATTR-INTERFACE: namespace a::traits {
// ATTR-INTERFACE-NEXT: class SimpleEncodingTrait;
}
@@ -116,8 +113,7 @@ def SimpleOpInterface : OpInterface<"SimpleOpInterface"> {
Simple Op Interface description
}];
-// OP-INTERFACE: namespace a {
-// OP-INTERFACE-NEXT: namespace traits {
+// OP-INTERFACE: namespace a::traits {
// OP-INTERFACE-NEXT: /// Simple Op Interface description
// OP-INTERFACE-NEXT: class SimpleOpInterface;
}
diff --git a/mlir/test/mlir-tblgen/dialect.td b/mlir/test/mlir-tblgen/dialect.td
index f35ce34..9b45495 100644
--- a/mlir/test/mlir-tblgen/dialect.td
+++ b/mlir/test/mlir-tblgen/dialect.td
@@ -62,9 +62,14 @@ def E_SpecialNSOp : Op<E_Dialect, "special_ns_op", []> {
// DEF: ::E::SPECIAL_NS::SpecialNSOp definitions
// DECL-LABEL: GET_OP_CLASSES
+// DECL: namespace a {
// DECL: a::SomeOp declarations
+// DECL: namespace BNS {
// DECL: BNS::SomeOp declarations
+// DECL: namespace C::CC {
// DECL: ::C::CC::SomeOp declarations
// DECL: DSomeOp declarations
+// DECL: namespace ENS {
// DECL: ENS::SomeOp declarations
+// DECL: namespace E::SPECIAL_NS {
// DECL: ::E::SPECIAL_NS::SpecialNSOp declarations
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 26ee9f3..66c4018 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -1,6 +1,7 @@
# RUN: %PYTHON %s | FileCheck %s
from mlir.ir import *
+import mlir.ir as ir
import mlir.dialects.gpu as gpu
import mlir.dialects.gpu.passes
from mlir.passmanager import *
@@ -64,3 +65,95 @@ def testObjectAttr():
# CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
print(o)
assert o.kernels == kernelTable
+
+
+# CHECK-LABEL: testGPUFuncOp
+@run
+def testGPUFuncOp():
+ assert gpu.GPUFuncOp.__doc__ is not None
+ module = Module.create()
+ with InsertionPoint(module.body):
+ gpu_module_name = StringAttr.get("gpu_module")
+ gpumodule = gpu.GPUModuleOp(gpu_module_name)
+ block = gpumodule.bodyRegion.blocks.append()
+
+ def builder(func: gpu.GPUFuncOp) -> None:
+ gpu.GlobalIdOp(gpu.Dimension.x)
+ gpu.ReturnOp([])
+
+ with InsertionPoint(block):
+ name = StringAttr.get("kernel0")
+ func_type = ir.FunctionType.get(inputs=[], results=[])
+ type_attr = TypeAttr.get(func_type)
+ func = gpu.GPUFuncOp(type_attr, name)
+ func.attributes["sym_name"] = name
+ func.attributes["gpu.kernel"] = UnitAttr.get()
+
+ try:
+ func.entry_block
+ assert False, "Expected RuntimeError"
+ except RuntimeError as e:
+ assert (
+ str(e)
+ == "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
+ )
+
+ block = func.add_entry_block()
+ with InsertionPoint(block):
+ builder(func)
+
+ try:
+ func.add_entry_block()
+ assert False, "Expected RuntimeError"
+ except RuntimeError as e:
+ assert str(e) == "Entry block already exists for kernel0"
+
+ func = gpu.GPUFuncOp(
+ func_type,
+ sym_name="kernel1",
+ kernel=True,
+ body_builder=builder,
+ known_block_size=[1, 2, 3],
+ known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
+ )
+
+ assert func.name.value == "kernel1"
+ assert func.function_type.value == func_type
+ assert func.arg_attrs == None
+ assert func.res_attrs == None
+ assert func.arguments == []
+ assert func.entry_block == func.body.blocks[0]
+ assert func.is_kernel
+ assert func.known_block_size == DenseI32ArrayAttr.get(
+ [1, 2, 3]
+ ), func.known_block_size
+ assert func.known_grid_size == DenseI32ArrayAttr.get(
+ [4, 5, 6]
+ ), func.known_grid_size
+
+ func = gpu.GPUFuncOp(
+ func_type,
+ sym_name="non_kernel_func",
+ body_builder=builder,
+ )
+ assert not func.is_kernel
+ assert func.known_block_size is None
+ assert func.known_grid_size is None
+
+ print(module)
+
+ # CHECK: gpu.module @gpu_module
+ # CHECK: gpu.func @kernel0() kernel {
+ # CHECK: %[[VAL_0:.*]] = gpu.global_id x
+ # CHECK: gpu.return
+ # CHECK: }
+ # CHECK: gpu.func @kernel1() kernel attributes
+ # CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
+ # CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
+ # CHECK: %[[VAL_0:.*]] = gpu.global_id x
+ # CHECK: gpu.return
+ # CHECK: }
+ # CHECK: gpu.func @non_kernel_func() {
+ # CHECK: %[[VAL_0:.*]] = gpu.global_id x
+ # CHECK: gpu.return
+ # CHECK: }
diff --git a/mlir/test/python/dialects/openacc.py b/mlir/test/python/dialects/openacc.py
new file mode 100644
index 0000000..8f2142a
--- /dev/null
+++ b/mlir/test/python/dialects/openacc.py
@@ -0,0 +1,171 @@
+# RUN: %PYTHON %s | FileCheck %s
+from unittest import result
+from mlir.ir import (
+ Context,
+ FunctionType,
+ Location,
+ Module,
+ InsertionPoint,
+ IntegerType,
+ IndexType,
+ MemRefType,
+ F32Type,
+ Block,
+ ArrayAttr,
+ Attribute,
+ UnitAttr,
+ StringAttr,
+ DenseI32ArrayAttr,
+ ShapedType,
+)
+from mlir.dialects import openacc, func, arith, memref
+from mlir.extras import types
+
+
+def run(f):
+ print("\n// TEST:", f.__name__)
+ with Context(), Location.unknown():
+ f()
+ return f
+
+
+@run
+def testParallelMemcpy():
+ module = Module.create()
+
+ dynamic = ShapedType.get_dynamic_size()
+ memref_f32_1d_any = MemRefType.get([dynamic], types.f32())
+
+ with InsertionPoint(module.body):
+ function_type = FunctionType.get(
+ [memref_f32_1d_any, memref_f32_1d_any, types.i64()], []
+ )
+ f = func.FuncOp(
+ type=function_type,
+ name="memcpy_idiom",
+ )
+ f.attributes["sym_visibility"] = StringAttr.get("public")
+
+ with InsertionPoint(f.add_entry_block()):
+ c1024 = arith.ConstantOp(types.i32(), 1024)
+ c128 = arith.ConstantOp(types.i32(), 128)
+
+ arg0, arg1, arg2 = f.arguments
+
+ copied = openacc.copyin(
+ acc_var=arg0.type,
+ var=arg0,
+ var_type=types.f32(),
+ bounds=[],
+ async_operands=[],
+ implicit=False,
+ structured=True,
+ )
+ created = openacc.create_(
+ acc_var=arg1.type,
+ var=arg1,
+ var_type=types.f32(),
+ bounds=[],
+ async_operands=[],
+ implicit=False,
+ structured=True,
+ )
+
+ parallel_op = openacc.ParallelOp(
+ asyncOperands=[],
+ waitOperands=[],
+ numGangs=[c1024],
+ numWorkers=[],
+ vectorLength=[c128],
+ reductionOperands=[],
+ privateOperands=[],
+ firstprivateOperands=[],
+ dataClauseOperands=[],
+ )
+
+ # Set required device_type and segment attributes to satisfy verifier
+ acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+ parallel_op.numGangsDeviceType = acc_device_none
+ parallel_op.numGangsSegments = DenseI32ArrayAttr.get([1])
+ parallel_op.vectorLengthDeviceType = acc_device_none
+
+ parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[])
+
+ with InsertionPoint(parallel_block):
+ c0 = arith.ConstantOp(types.i64(), 0)
+ c1 = arith.ConstantOp(types.i64(), 1)
+
+ loop_op = openacc.LoopOp(
+ results_=[],
+ lowerbound=[c0],
+ upperbound=[f.arguments[2]],
+ step=[c1],
+ gangOperands=[],
+ workerNumOperands=[],
+ vectorOperands=[],
+ tileOperands=[],
+ cacheOperands=[],
+ privateOperands=[],
+ reductionOperands=[],
+ firstprivateOperands=[],
+ )
+
+ # Set loop attributes: gang and independent on device_type<none>
+ acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type<none>")])
+ loop_op.gang = acc_device_none
+ loop_op.independent = acc_device_none
+
+ loop_block = Block.create_at_start(
+ parent=loop_op.region, arg_types=[types.i64()]
+ )
+
+ with InsertionPoint(loop_block):
+ idx = arith.index_cast(out=IndexType.get(), in_=loop_block.arguments[0])
+ val = memref.load(memref=copied, indices=[idx])
+ memref.store(value=val, memref=created, indices=[idx])
+ openacc.YieldOp([])
+
+ openacc.YieldOp([])
+
+ deleted = openacc.delete(
+ acc_var=copied,
+ bounds=[],
+ async_operands=[],
+ implicit=False,
+ structured=True,
+ )
+ copied = openacc.copyout(
+ acc_var=created,
+ var=arg1,
+ var_type=types.f32(),
+ bounds=[],
+ async_operands=[],
+ implicit=False,
+ structured=True,
+ )
+ func.ReturnOp([])
+
+ print(module)
+
+ # CHECK: TEST: testParallelMemcpy
+ # CHECK-LABEL: func.func public @memcpy_idiom(
+ # CHECK-SAME: %[[ARG0:.*]]: memref<?xf32>, %[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: i64) {
+ # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32
+ # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32
+ # CHECK: %[[COPYIN_0:.*]] = acc.copyin varPtr(%[[ARG0]] : memref<?xf32>) -> memref<?xf32>
+ # CHECK: %[[CREATE_0:.*]] = acc.create varPtr(%[[ARG1]] : memref<?xf32>) -> memref<?xf32>
+ # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) {
+ # CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i64
+ # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64
+ # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) {
+ # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index
+ # CHECK: %[[LOAD_0:.*]] = memref.load %[[COPYIN_0]]{{\[}}%[[INDEX_CAST_0]]] : memref<?xf32>
+ # CHECK: memref.store %[[LOAD_0]], %[[CREATE_0]]{{\[}}%[[INDEX_CAST_0]]] : memref<?xf32>
+ # CHECK: acc.yield
+ # CHECK: } attributes {independent = [#acc.device_type<none>]}
+ # CHECK: acc.yield
+ # CHECK: }
+ # CHECK: acc.delete accPtr(%[[COPYIN_0]] : memref<?xf32>)
+ # CHECK: acc.copyout accPtr(%[[CREATE_0]] : memref<?xf32>) to varPtr(%[[ARG1]] : memref<?xf32>)
+ # CHECK: return
+ # CHECK: }
diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py
index 8785d6d..d6b70dc 100644
--- a/mlir/test/python/dialects/transform_structured_ext.py
+++ b/mlir/test/python/dialects/transform_structured_ext.py
@@ -109,13 +109,29 @@ def testFuseOpCompact(target):
)
# CHECK-LABEL: TEST: testFuseOpCompact
# CHECK: transform.sequence
- # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse %{{.*}}[4, 8]
- # CHECK-SAME: interchange [0, 1] apply_cleanup = true
+ # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse %{{.*}} tile_sizes [4, 8]
+ # CHECK-SAME: interchange [0, 1] {apply_cleanup}
# CHECK-SAME: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
@run
@create_sequence
+def testFuseOpCompactForall(target):
+ structured.FuseOp(
+ target,
+ tile_sizes=[4, 8],
+ apply_cleanup=True,
+ use_forall=True,
+ )
+ # CHECK-LABEL: TEST: testFuseOpCompact
+ # CHECK: transform.sequence
+ # CHECK: %{{.+}}, %{{.+}} = transform.structured.fuse %{{.*}} tile_sizes [4, 8]
+ # CHECK-SAME: {apply_cleanup, use_forall}
+ # CHECK-SAME: (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+
+
+@run
+@create_sequence
def testFuseOpNoArg(target):
structured.FuseOp(target)
# CHECK-LABEL: TEST: testFuseOpNoArg
@@ -126,13 +142,51 @@ def testFuseOpNoArg(target):
@run
@create_sequence
+def testFuseOpParams(target):
+ structured.FuseOp(
+ target,
+ tile_sizes=[constant_param(4), Attribute.parse("8")],
+ tile_interchange=[constant_param(0), Attribute.parse("1")],
+ )
+ # CHECK-LABEL: TEST: testFuseOpParams
+ # CHECK: transform.sequence
+ # CHECK-DAG: %[[P:.*]] = transform.param.constant 4
+ # CHECK-DAG: %[[I:.*]] = transform.param.constant 0
+ # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse
+ # CHECK-SAME: tile_sizes [%[[P]], 8]
+ # CHECK-SAME: interchange [%[[I]], 1]
+ # CHECK-SAME: (!transform.any_op, !transform.param<i64>, !transform.param<i64>) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+
+@run
+@create_sequence
+def testFuseOpHandles(target):
+ size1 = structured.MatchOp.match_op_names(target, ["arith.constant"])
+ ichange1 = structured.MatchOp.match_op_names(target, ["arith.constant"])
+ structured.FuseOp(
+ target,
+ tile_sizes=[size1, 8],
+ tile_interchange=[ichange1, 1],
+ )
+ # CHECK-LABEL: TEST: testFuseOpHandles
+ # CHECK: transform.sequence
+ # CHECK: %[[H:.*]] = transform.structured.match
+ # CHECK: %[[I:.*]] = transform.structured.match
+ # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse
+ # CHECK-SAME: tile_sizes [%[[H]], 8]
+ # CHECK-SAME: interchange [%[[I]], 1]
+ # CHECK-SAME: (!transform.any_op, !transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+
+
+@run
+@create_sequence
def testFuseOpAttributes(target):
attr = DenseI64ArrayAttr.get([4, 8])
ichange = DenseI64ArrayAttr.get([0, 1])
structured.FuseOp(target, tile_sizes=attr, tile_interchange=ichange)
# CHECK-LABEL: TEST: testFuseOpAttributes
# CHECK: transform.sequence
- # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse %{{.*}}[4, 8]
+ # CHECK: %{{.+}}, %{{.+}}:2 = transform.structured.fuse %{{.*}} tile_sizes [4, 8]
# CHECK-SAME: interchange [0, 1]
# CHECK-SAME: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py
index cb4cfc8c..1d4ede1 100644
--- a/mlir/test/python/ir/operation.py
+++ b/mlir/test/python/ir/operation.py
@@ -569,12 +569,30 @@ def testOperationAttributes():
# CHECK: Attribute value b'text'
print(f"Attribute value {sattr.value_bytes}")
+ # Python dict-style iteration
# We don't know in which order the attributes are stored.
- # CHECK-DAG: NamedAttribute(dependent="text")
- # CHECK-DAG: NamedAttribute(other.attribute=3.000000e+00 : f64)
- # CHECK-DAG: NamedAttribute(some.attribute=1 : i8)
- for attr in op.attributes:
- print(str(attr))
+ # CHECK-DAG: dependent
+ # CHECK-DAG: other.attribute
+ # CHECK-DAG: some.attribute
+ for name in op.attributes:
+ print(name)
+
+ # Basic dict-like introspection
+ # CHECK: True
+ print("some.attribute" in op.attributes)
+ # CHECK: False
+ print("missing" in op.attributes)
+ # CHECK: Keys: ['dependent', 'other.attribute', 'some.attribute']
+ print("Keys:", sorted(op.attributes.keys()))
+ # CHECK: Values count 3
+ print("Values count", len(op.attributes.values()))
+ # CHECK: Items count 3
+ print("Items count", len(op.attributes.items()))
+
+ # Dict() conversion test
+ d = {k: v.value for k, v in dict(op.attributes).items()}
+ # CHECK: Dict mapping {'dependent': 'text', 'other.attribute': 3.0, 'some.attribute': 1}
+ print("Dict mapping", d)
# Check that exceptions are raised as expected.
try:
diff --git a/mlir/test/python/rewrite.py b/mlir/test/python/rewrite.py
index acf7db2..821e470 100644
--- a/mlir/test/python/rewrite.py
+++ b/mlir/test/python/rewrite.py
@@ -17,15 +17,16 @@ def run(f):
def testRewritePattern():
def to_muli(op, rewriter):
with rewriter.ip:
- new_op = arith.muli(op.operands[0], op.operands[1], loc=op.location)
+ assert isinstance(op, arith.AddIOp)
+ new_op = arith.muli(op.lhs, op.rhs, loc=op.location)
rewriter.replace_op(op, new_op.owner)
def constant_1_to_2(op, rewriter):
- c = op.attributes["value"].value
+ c = op.value.value
if c != 1:
return True # failed to match
with rewriter.ip:
- new_op = arith.constant(op.result.type, 2, loc=op.location)
+ new_op = arith.constant(op.type, 2, loc=op.location)
rewriter.replace_op(op, [new_op])
with Context():