aboutsummaryrefslogtreecommitdiff
path: root/mlir/test
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test')
-rw-r--r--mlir/test/Conversion/SCFToEmitC/while.mlir293
-rw-r--r--mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir84
-rw-r--r--mlir/test/Dialect/EmitC/invalid_ops.mlir149
-rw-r--r--mlir/test/Dialect/EmitC/ops.mlir20
-rw-r--r--mlir/test/Dialect/Linalg/decompose-pack.mlir21
-rw-r--r--mlir/test/Dialect/Linalg/runtime-verification.mlir4
-rw-r--r--mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir116
-rw-r--r--mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir24
-rw-r--r--mlir/test/Dialect/OpenACC/pointer-like-interface-copy.mlir23
-rw-r--r--mlir/test/Dialect/OpenACC/pointer-like-interface-free.mlir31
-rw-r--r--mlir/test/Dialect/Vector/canonicalize.mlir40
-rw-r--r--mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/MemRef/cast-runtime-verification.mlir8
-rw-r--r--mlir/test/Integration/Dialect/MemRef/copy-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/MemRef/dim-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/MemRef/load-runtime-verification.mlir6
-rw-r--r--mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir16
-rw-r--r--mlir/test/Integration/Dialect/Tensor/cast-runtime-verification.mlir4
-rw-r--r--mlir/test/Integration/Dialect/Tensor/dim-runtime-verification.mlir2
-rw-r--r--mlir/test/Integration/Dialect/Tensor/extract-runtime-verification.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir16
-rw-r--r--mlir/test/Target/Cpp/do.mlir168
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir8
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir44
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir-invalid.mlir42
-rw-r--r--mlir/test/lib/Analysis/DataFlow/TestDenseBackwardDataFlowAnalysis.cpp72
-rw-r--r--mlir/test/lib/Dialect/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Dialect/OpenACC/CMakeLists.txt16
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp23
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp305
-rw-r--r--mlir/test/python/pass_manager.py20
33 files changed, 1490 insertions, 82 deletions
diff --git a/mlir/test/Conversion/SCFToEmitC/while.mlir b/mlir/test/Conversion/SCFToEmitC/while.mlir
new file mode 100644
index 0000000..28524a0
--- /dev/null
+++ b/mlir/test/Conversion/SCFToEmitC/while.mlir
@@ -0,0 +1,293 @@
+// RUN: mlir-opt -allow-unregistered-dialect -convert-scf-to-emitc %s | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect -convert-to-emitc="filter-dialects=scf" %s | FileCheck %s
+
+emitc.func @payload_one_result(%arg: i32) -> i32 {
+ %result = add %arg, %arg : (i32, i32) -> i32
+ return %result : i32
+}
+
+func.func @one_result() -> i32 {
+ %init = emitc.literal "1.0" : i32
+ %var = emitc.literal "1.0" : i32
+ %exit = emitc.literal "10.0" : i32
+
+ %res = scf.while (%arg1 = %init) : (i32) -> i32 {
+ %sum = emitc.add %arg1, %var : (i32, i32) -> i32
+ %condition = emitc.cmp lt, %sum, %exit : (i32, i32) -> i1
+ %next = emitc.add %arg1, %arg1 : (i32, i32) -> i32
+ scf.condition(%condition) %next : i32
+ } do {
+ ^bb0(%arg2: i32):
+ %next_arg1 = emitc.call @payload_one_result(%arg2) : (i32) -> i32
+ scf.yield %next_arg1 : i32
+ }
+
+ return %res : i32
+}
+// CHECK-LABEL: emitc.func @payload_one_result(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: %[[VAL_0:.*]] = add %[[ARG0]], %[[ARG0]] : (i32, i32) -> i32
+// CHECK: return %[[VAL_0]] : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func @one_result() -> i32 {
+// CHECK: %[[VAL_0:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_1:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_2:.*]] = emitc.literal "10.0" : i32
+// CHECK: %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: emitc.assign %[[VAL_0]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i1>
+// CHECK: emitc.do {
+// CHECK: %[[VAL_6:.*]] = load %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_7:.*]] = add %[[VAL_6]], %[[VAL_1]] : (i32, i32) -> i32
+// CHECK: %[[VAL_8:.*]] = cmp lt, %[[VAL_7]], %[[VAL_2]] : (i32, i32) -> i1
+// CHECK: %[[VAL_9:.*]] = add %[[VAL_6]], %[[VAL_6]] : (i32, i32) -> i32
+// CHECK: assign %[[VAL_9]] : i32 to %[[VAL_3]] : <i32>
+// CHECK: assign %[[VAL_8]] : i1 to %[[VAL_5]] : <i1>
+// CHECK: if %[[VAL_8]] {
+// CHECK: %[[VAL_10:.*]] = call @payload_one_result(%[[VAL_9]]) : (i32) -> i32
+// CHECK: assign %[[VAL_10]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: }
+// CHECK: } while {
+// CHECK: %[[VAL_11:.*]] = expression %[[VAL_5]] : (!emitc.lvalue<i1>) -> i1 {
+// CHECK: %[[VAL_12:.*]] = load %[[VAL_5]] : <i1>
+// CHECK: yield %[[VAL_12]] : i1
+// CHECK: }
+// CHECK: yield %[[VAL_11]] : i1
+// CHECK: }
+// CHECK: %[[VAL_13:.*]] = emitc.load %[[VAL_3]] : <i32>
+// CHECK: return %[[VAL_13]] : i32
+// CHECK: }
+
+emitc.func @payload_two_results(%arg: i32) -> i32 {
+ %result = add %arg, %arg : (i32, i32) -> i32
+ return %result : i32
+}
+
+func.func @two_results() -> i32 {
+ %init = emitc.literal "1.0" : i32
+ %exit = emitc.literal "10.0" : i32
+
+ %res1, %res2 = scf.while (%arg1_1 = %init, %arg1_2 = %init) : (i32, i32) -> (i32, i32) {
+ %sum = emitc.add %arg1_1, %arg1_2 : (i32, i32) -> i32
+ %condition = emitc.cmp lt, %sum, %exit : (i32, i32) -> i1
+ scf.condition(%condition) %init, %arg1_2 : i32, i32
+ } do {
+ ^bb0(%arg2_1 : i32, %arg2_2 : i32):
+ %next1 = emitc.call @payload_two_results(%arg2_1) : (i32) -> i32
+ %next2 = emitc.call @payload_two_results(%arg2_2) : (i32) -> i32
+ scf.yield %next1, %next2 : i32, i32
+ }
+
+ return %res1 : i32
+}
+// CHECK-LABEL: emitc.func @payload_two_results(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: %[[VAL_0:.*]] = add %[[ARG0]], %[[ARG0]] : (i32, i32) -> i32
+// CHECK: return %[[VAL_0]] : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func @two_results() -> i32 {
+// CHECK: %[[VAL_0:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_1:.*]] = emitc.literal "10.0" : i32
+// CHECK: %[[VAL_2:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: emitc.assign %[[VAL_0]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: emitc.assign %[[VAL_0]] : i32 to %[[VAL_5]] : <i32>
+// CHECK: %[[VAL_6:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i1>
+// CHECK: emitc.do {
+// CHECK: %[[VAL_7:.*]] = load %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_8:.*]] = load %[[VAL_5]] : <i32>
+// CHECK: %[[VAL_9:.*]] = add %[[VAL_7]], %[[VAL_8]] : (i32, i32) -> i32
+// CHECK: %[[VAL_10:.*]] = cmp lt, %[[VAL_9]], %[[VAL_1]] : (i32, i32) -> i1
+// CHECK: assign %[[VAL_0]] : i32 to %[[VAL_2]] : <i32>
+// CHECK: assign %[[VAL_8]] : i32 to %[[VAL_3]] : <i32>
+// CHECK: assign %[[VAL_10]] : i1 to %[[VAL_6]] : <i1>
+// CHECK: if %[[VAL_10]] {
+// CHECK: %[[VAL_11:.*]] = call @payload_two_results(%[[VAL_0]]) : (i32) -> i32
+// CHECK: %[[VAL_12:.*]] = call @payload_two_results(%[[VAL_8]]) : (i32) -> i32
+// CHECK: assign %[[VAL_11]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: assign %[[VAL_12]] : i32 to %[[VAL_5]] : <i32>
+// CHECK: }
+// CHECK: } while {
+// CHECK: %[[VAL_13:.*]] = expression %[[VAL_6]] : (!emitc.lvalue<i1>) -> i1 {
+// CHECK: %[[VAL_14:.*]] = load %[[VAL_6]] : <i1>
+// CHECK: yield %[[VAL_14]] : i1
+// CHECK: }
+// CHECK: yield %[[VAL_13]] : i1
+// CHECK: }
+// CHECK: %[[VAL_15:.*]] = emitc.load %[[VAL_2]] : <i32>
+// CHECK: %[[VAL_16:.*]] = emitc.load %[[VAL_3]] : <i32>
+// CHECK: return %[[VAL_15]] : i32
+// CHECK: }
+
+emitc.func @payload_double_use(%arg: i32) -> i32 {
+ %result = add %arg, %arg : (i32, i32) -> i32
+ return %result : i32
+}
+
+emitc.func @foo_with_side_effect(%arg: i32, %p : !emitc.ptr<i32>) -> i32 {
+ %sum = add %arg, %arg : (i32, i32) -> i32
+ emitc.verbatim "{}[0] = {};" args %p, %sum : !emitc.ptr<i32>, i32
+ return %sum : i32
+}
+
+func.func @double_use(%p : !emitc.ptr<i32>) -> i32 {
+ %init = emitc.literal "1.0" : i32
+ %var = emitc.literal "1.0" : i32
+ %exit = emitc.literal "10.0" : i32
+ %res = scf.while (%arg1 = %init) : (i32) -> i32 {
+ %used_twice = emitc.call @foo_with_side_effect(%arg1, %p) : (i32, !emitc.ptr<i32>) -> i32
+ %prod = emitc.add %used_twice, %used_twice : (i32, i32) -> i32
+ %sum = emitc.add %arg1, %prod : (i32, i32) -> i32
+ %condition = emitc.cmp lt, %sum, %exit : (i32, i32) -> i1
+ scf.condition(%condition) %arg1 : i32
+ } do {
+ ^bb0(%arg2: i32):
+ %next_arg1 = emitc.call @payload_double_use(%arg2) : (i32) -> i32
+ scf.yield %next_arg1 : i32
+ }
+ return %res : i32
+}
+// CHECK-LABEL: emitc.func @payload_double_use(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: %[[VAL_0:.*]] = add %[[ARG0]], %[[ARG0]] : (i32, i32) -> i32
+// CHECK: return %[[VAL_0]] : i32
+// CHECK: }
+
+// CHECK-LABEL: emitc.func @foo_with_side_effect(
+// CHECK-SAME: %[[ARG0:.*]]: i32,
+// CHECK-SAME: %[[ARG1:.*]]: !emitc.ptr<i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = add %[[ARG0]], %[[ARG0]] : (i32, i32) -> i32
+// CHECK: verbatim "{}[0] = {};" args %[[ARG1]], %[[VAL_0]] : !emitc.ptr<i32>, i32
+// CHECK: return %[[VAL_0]] : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func @double_use(
+// CHECK-SAME: %[[ARG0:.*]]: !emitc.ptr<i32>) -> i32 {
+// CHECK: %[[VAL_0:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_1:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_2:.*]] = emitc.literal "10.0" : i32
+// CHECK: %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: emitc.assign %[[VAL_0]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i1>
+// CHECK: emitc.do {
+// CHECK: %[[VAL_6:.*]] = load %[[VAL_4]] : <i32>
+// CHECK: %[[VAL_7:.*]] = call @foo_with_side_effect(%[[VAL_6]], %[[ARG0]]) : (i32, !emitc.ptr<i32>) -> i32
+// CHECK: %[[VAL_8:.*]] = add %[[VAL_7]], %[[VAL_7]] : (i32, i32) -> i32
+// CHECK: %[[VAL_9:.*]] = add %[[VAL_6]], %[[VAL_8]] : (i32, i32) -> i32
+// CHECK: %[[VAL_10:.*]] = cmp lt, %[[VAL_9]], %[[VAL_2]] : (i32, i32) -> i1
+// CHECK: assign %[[VAL_6]] : i32 to %[[VAL_3]] : <i32>
+// CHECK: assign %[[VAL_10]] : i1 to %[[VAL_5]] : <i1>
+// CHECK: if %[[VAL_10]] {
+// CHECK: %[[VAL_11:.*]] = call @payload_double_use(%[[VAL_6]]) : (i32) -> i32
+// CHECK: assign %[[VAL_11]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: }
+// CHECK: } while {
+// CHECK: %[[VAL_12:.*]] = expression %[[VAL_5]] : (!emitc.lvalue<i1>) -> i1 {
+// CHECK: %[[VAL_13:.*]] = load %[[VAL_5]] : <i1>
+// CHECK: yield %[[VAL_13]] : i1
+// CHECK: }
+// CHECK: yield %[[VAL_12]] : i1
+// CHECK: }
+// CHECK: %[[VAL_14:.*]] = emitc.load %[[VAL_3]] : <i32>
+// CHECK: return %[[VAL_14]] : i32
+// CHECK: }
+
+emitc.func @payload_empty_after_region() -> i1 {
+ %true = emitc.literal "true" : i1
+ return %true : i1
+}
+
+func.func @empty_after_region() {
+ scf.while () : () -> () {
+ %condition = emitc.call @payload_empty_after_region() : () -> i1
+ scf.condition(%condition)
+ } do {
+ ^bb0():
+ scf.yield
+ }
+ return
+}
+// CHECK-LABEL: emitc.func @payload_empty_after_region() -> i1 {
+// CHECK: %[[VAL_0:.*]] = literal "true" : i1
+// CHECK: return %[[VAL_0]] : i1
+// CHECK: }
+
+// CHECK-LABEL: func.func @empty_after_region() {
+// CHECK: %[[VAL_0:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i1>
+// CHECK: emitc.do {
+// CHECK: %[[VAL_1:.*]] = call @payload_empty_after_region() : () -> i1
+// CHECK: assign %[[VAL_1]] : i1 to %[[VAL_0]] : <i1>
+// CHECK: } while {
+// CHECK: %[[VAL_2:.*]] = expression %[[VAL_0]] : (!emitc.lvalue<i1>) -> i1 {
+// CHECK: %[[VAL_3:.*]] = load %[[VAL_0]] : <i1>
+// CHECK: yield %[[VAL_3]] : i1
+// CHECK: }
+// CHECK: yield %[[VAL_2]] : i1
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+emitc.func @payload_different_number_of_vars(%arg0: i32) -> i32 {
+ %0 = add %arg0, %arg0 : (i32, i32) -> i32
+ return %0 : i32
+}
+func.func @different_number_of_vars() -> (i32, i32) {
+ %init = emitc.literal "1.0" : i32
+ %var = emitc.literal "7.0" : i32
+ %exit = emitc.literal "10.0" : i32
+ %res, %res2 = scf.while (%arg1 = %init) : (i32) -> (i32, i32) {
+ %sum = emitc.add %arg1, %var : (i32, i32) -> i32
+ %condition = emitc.cmp lt, %sum, %exit : (i32, i32) -> i1
+ %next = emitc.add %arg1, %arg1 : (i32, i32) -> i32
+ scf.condition(%condition) %next, %sum : i32, i32
+ } do {
+ ^bb0(%arg2: i32, %arg3 : i32):
+ %next_arg1 = emitc.call @payload_different_number_of_vars(%arg2) : (i32) -> i32
+ scf.yield %next_arg1 : i32
+ }
+ return %res, %res2 : i32, i32
+}
+// CHECK-LABEL: emitc.func @payload_different_number_of_vars(
+// CHECK-SAME: %[[ARG0:.*]]: i32) -> i32 {
+// CHECK: %[[VAL_0:.*]] = add %[[ARG0]], %[[ARG0]] : (i32, i32) -> i32
+// CHECK: return %[[VAL_0]] : i32
+// CHECK: }
+
+// CHECK-LABEL: func.func @different_number_of_vars() -> (i32, i32) {
+// CHECK: %[[VAL_0:.*]] = emitc.literal "1.0" : i32
+// CHECK: %[[VAL_1:.*]] = emitc.literal "7.0" : i32
+// CHECK: %[[VAL_2:.*]] = emitc.literal "10.0" : i32
+// CHECK: %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: %[[VAL_5:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i32>
+// CHECK: emitc.assign %[[VAL_0]] : i32 to %[[VAL_5]] : <i32>
+// CHECK: %[[VAL_6:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.lvalue<i1>
+// CHECK: emitc.do {
+// CHECK: %[[VAL_7:.*]] = load %[[VAL_5]] : <i32>
+// CHECK: %[[VAL_8:.*]] = add %[[VAL_7]], %[[VAL_1]] : (i32, i32) -> i32
+// CHECK: %[[VAL_9:.*]] = cmp lt, %[[VAL_8]], %[[VAL_2]] : (i32, i32) -> i1
+// CHECK: %[[VAL_10:.*]] = add %[[VAL_7]], %[[VAL_7]] : (i32, i32) -> i32
+// CHECK: assign %[[VAL_10]] : i32 to %[[VAL_3]] : <i32>
+// CHECK: assign %[[VAL_8]] : i32 to %[[VAL_4]] : <i32>
+// CHECK: assign %[[VAL_9]] : i1 to %[[VAL_6]] : <i1>
+// CHECK: if %[[VAL_9]] {
+// CHECK: %[[VAL_11:.*]] = call @payload_different_number_of_vars(%[[VAL_10]]) : (i32) -> i32
+// CHECK: assign %[[VAL_11]] : i32 to %[[VAL_5]] : <i32>
+// CHECK: }
+// CHECK: } while {
+// CHECK: %[[VAL_12:.*]] = expression %[[VAL_6]] : (!emitc.lvalue<i1>) -> i1 {
+// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]] : <i1>
+// CHECK: yield %[[VAL_13]] : i1
+// CHECK: }
+// CHECK: yield %[[VAL_12]] : i1
+// CHECK: }
+// CHECK: %[[VAL_14:.*]] = emitc.load %[[VAL_3]] : <i32>
+// CHECK: %[[VAL_15:.*]] = emitc.load %[[VAL_4]] : <i32>
+// CHECK: return %[[VAL_14]], %[[VAL_15]] : i32, i32
+// CHECK: }
diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
index 8f60a07..b31a973 100644
--- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
+++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir
@@ -261,3 +261,87 @@ llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) {
llvm.store %val, %a {cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>} : i32, !llvm.ptr<1>
llvm.return
}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t
+// CHECK: llvm.func @blockload_as1(%[[ARG0:.*]]: !llvm.ptr<1>)
+llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> {
+ // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t(%[[ARG0]])
+ // CHECK-SAME: {function_type = !llvm.func<vector<8xi16> (ptr<1>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t",
+ // CHECK-SAME: visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+ // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+ %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<1>) -> vector<8xi16>
+ llvm.return %loaded_a : vector<8xi16>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(!llvm.ptr<3>)
+// CHECK: llvm.func @blockload_as3(%[[ARG0:.*]]: !llvm.ptr<3>)
+llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> {
+ // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(%[[ARG0]])
+ // CHECK-SAME: {function_type = !llvm.func<vector<16xi8> (ptr<3>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+ // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+ %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> vector<16xi8>
+ llvm.return %loaded_a : vector<16xi8>
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(!llvm.ptr<3>)
+// CHECK: llvm.func @blockload_scalar(%[[ARG0:.*]]: !llvm.ptr<3>)
+llvm.func @blockload_scalar(%ptr: !llvm.ptr<3>) -> i8 {
+ // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(%[[ARG0]])
+ // CHECK-SAME: {function_type = !llvm.func<i8 (ptr<3>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z29intel_sub_group_block_read_ucPU3AS3h", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32],
+ // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32]
+ %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control<L1uc_L2uc_L3uc>}> : (!llvm.ptr<3>) -> i8
+ llvm.return %loaded_a : i8
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j
+// CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) {
+llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) {
+ // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j(%[[ARG0]], %[[ARG1]])
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<1>, vector<8xi32>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+ // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+ xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<1>, vector<8xi32>)
+ llvm.return
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m
+// CHECK: llvm.func @blockstore_as3(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: vector<2xi64>) {
+llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) {
+ // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(%[[ARG0]], %[[ARG1]])
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, vector<2xi64>)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+ // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+ xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, vector<2xi64>)
+ llvm.return
+}
+
+// -----
+// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm
+// CHECK: llvm.func @blockstore_scalar(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: i64) {
+llvm.func @blockstore_scalar(%ptr: !llvm.ptr<3>, %data: i64) {
+ // CHECK: llvm.call spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm(%[[ARG0]], %[[ARG1]])
+ // CHECK-SAME: {function_type = !llvm.func<void (ptr<3>, i64)>, linkage = #llvm.linkage<external>,
+ // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_write_ulPU3AS3mm", visibility_ = 0 : i64,
+ // CHECK-SAME: will_return, xevm.DecorationCacheControl =
+ // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32],
+ // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32]
+ xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control<L1wt_L2uc_L3wb>}> : (!llvm.ptr<3>, i64)
+ llvm.return
+}
diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index f4c15f5..5f594fb 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -252,7 +252,7 @@ func.func @sub_pointer_pointer(%arg0: !emitc.ptr<f32>, %arg1: !emitc.ptr<f32>) {
// -----
func.func @test_misplaced_yield() {
- // expected-error @+1 {{'emitc.yield' op expects parent op to be one of 'emitc.expression, emitc.if, emitc.for, emitc.switch'}}
+ // expected-error @+1 {{'emitc.yield' op expects parent op to be one of 'emitc.do, emitc.expression, emitc.for, emitc.if, emitc.switch'}}
emitc.yield
return
}
@@ -729,3 +729,150 @@ emitc.class @testClass {
return
}
}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ // expected-error @+1 {{'emitc.do' op condition region must contain exactly two operations: 'emitc.expression' followed by 'emitc.yield', but found 3 operations}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r = emitc.expression %1, %2 : (i32, i32) -> i1 {
+ %cmp = emitc.cmp eq, %1, %2 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ %3 = emitc.literal "3" : i32
+ emitc.yield %r : i1
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ // expected-error @+1 {{'emitc.do' op expected first op in condition region to be 'emitc.expression', but got emitc.literal}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %true = emitc.literal "true" : i1
+ emitc.yield %true : i1
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ // expected-error @+1 {{'emitc.do' op emitc.expression in condition region must return 'i1', but returns 'i32'}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r = emitc.expression %1, %2 : (i32, i32) -> i32 {
+ %add = emitc.add %1, %2 : (i32, i32) -> i32
+ emitc.yield %add : i32
+ }
+
+ emitc.yield %r : i32
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ // expected-error @+1 {{'emitc.do' op expected last op in condition region to be 'emitc.yield', but got emitc.expression}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r1 = emitc.expression %1, %2 : (i32, i32) -> i1 {
+ %cmp = emitc.cmp eq, %1, %2 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ %r2 = emitc.expression %1, %2 : (i32, i32) -> i32 {
+ %add = emitc.add %1, %2 : (i32, i32) -> i32
+ emitc.yield %add : i32
+ }
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ // expected-error @+1 {{'emitc.do' op expected condition region to return 1 value, but it returns 0 values}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r = emitc.expression %1, %2 : (i32, i32) -> i1 {
+ %cmp = emitc.cmp eq, %1, %2 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ emitc.yield
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ %true = emitc.literal "true" : i1
+
+ // expected-error @+1 {{'emitc.yield' must return result of 'emitc.expression' from this condition region}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r = emitc.expression %1, %2 : (i32, i32) -> i1 {
+ %cmp = emitc.cmp eq, %1, %2 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ emitc.yield %true: i1
+ }
+
+ return
+}
+
+// -----
+
+func.func @test_do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+
+ // expected-error @+1 {{'emitc.do' op body region must not contain terminator}}
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ emitc.yield
+ } while {
+ %r = emitc.expression %1, %2 : (i32, i32) -> i1 {
+ %cmp = emitc.cmp eq, %1, %2 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ emitc.yield %r: i1
+ }
+
+ return
+}
diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir
index 84c9b65..1259748 100644
--- a/mlir/test/Dialect/EmitC/ops.mlir
+++ b/mlir/test/Dialect/EmitC/ops.mlir
@@ -335,3 +335,23 @@ emitc.class final @finalClass {
return
}
}
+
+func.func @do(%arg0 : !emitc.ptr<i32>) {
+ %1 = emitc.literal "1" : i32
+ %2 = emitc.literal "2" : i32
+ %3 = emitc.literal "3" : i32
+
+ emitc.do {
+ emitc.verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ } while {
+ %r = emitc.expression %1, %2, %3 : (i32, i32, i32) -> i1 {
+ %add = emitc.add %1, %2 : (i32, i32) -> i32
+ %cmp = emitc.cmp eq, %add, %3 : (i32, i32) -> i1
+ emitc.yield %cmp : i1
+ }
+
+ emitc.yield %r : i1
+ }
+
+ return
+}
diff --git a/mlir/test/Dialect/Linalg/decompose-pack.mlir b/mlir/test/Dialect/Linalg/decompose-pack.mlir
index 17e6c29..18a09f4 100644
--- a/mlir/test/Dialect/Linalg/decompose-pack.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-pack.mlir
@@ -274,3 +274,24 @@ func.func @pack_with_adjacent_trailing_dimensions_inner_dims_pos_and_unit_outer(
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
// CHECK-SAME: [0, 0, 0, 0, 0] [1, 1, 1, 4, 1] [1, 1, 1, 1, 1] : tensor<1x4x1xf32> into tensor<1x1x1x4x1xf32>
// CHECK: return %[[INSERT]]
+
+// -----
+
+// The following example shows a pack operation where the inner dims
+// positions are non-adjacent and non-permuted.
+func.func @pack_with_non_adjacent_and_non_permuted_inner_dims(%arg0: tensor<8x1x1x1xf32>, %arg1:tensor<1x1x1x1x8x1xf32>) -> tensor<1x1x1x1x8x1xf32> {
+ %pack = linalg.pack %arg0 outer_dims_perm = [0, 1, 2, 3] inner_dims_pos = [0, 3] inner_tiles = [8, 1] into %arg1: tensor<8x1x1x1xf32> -> tensor<1x1x1x1x8x1xf32>
+ return %pack : tensor<1x1x1x1x8x1xf32>
+}
+
+// CHECK-LABEL: func.func @pack_with_non_adjacent_and_non_permuted_inner_dims
+// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
+// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
+// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x1x8x1xf32>
+// CHECK: %[[TRANSP:.+]] = linalg.transpose
+// CHECK-SAME: ins(%[[SRC]] : tensor<8x1x1x1xf32>)
+// CHECK-SAME: outs(%[[EMPTY]] : tensor<1x1x8x1xf32>)
+// CHECK-SAME: permutation = [1, 2, 0, 3]
+// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
+// CHECK-SAME: [0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 1] [1, 1, 1, 1, 1, 1] : tensor<1x1x8x1xf32> into tensor<1x1x1x1x8x1xf32>
+// CHECK: return %[[INSERT]]
diff --git a/mlir/test/Dialect/Linalg/runtime-verification.mlir b/mlir/test/Dialect/Linalg/runtime-verification.mlir
index 07e96c8..287f0e0 100644
--- a/mlir/test/Dialect/Linalg/runtime-verification.mlir
+++ b/mlir/test/Dialect/Linalg/runtime-verification.mlir
@@ -12,7 +12,9 @@ func.func @static_dims(%arg0: tensor<5xf32>, %arg1: tensor<5xf32>) -> (tensor<5x
// CHECK: cf.assert %[[TRUE]]
// VERBOSE0: %[[TRUE:.*]] = index.bool.constant true
// VERBOSE0: cf.assert %[[TRUE]]
- // VERBOSE0-SAME: ERROR: Runtime op verification failed\0A^\0ALocation: loc(
+ // VERBOSE0-SAME: ERROR: Runtime op verification failed\0A^ unexpected negative result on dimension #0
+ // VERBOSE0-SAME: Location
+ // VERBOSE0-SAME: 19:10
%result = tensor.empty() : tensor<5xf32>
%0 = linalg.generic {
indexing_maps = [#identity, #identity, #identity],
diff --git a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir
index c7b0bd5..8465e55 100644
--- a/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir
+++ b/mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir
@@ -127,3 +127,119 @@ func.func @conv2d(%arg0: tensor<2x11x11x5xf32>, %arg1: tensor<2x3x3x5xf32>, %arg
// CHECK-NEXT: %[[EXTRACTED_SLICE:.*]] = tensor.extract_slice %[[S6]][0, 0, 0, 0] [2, 9, 9, 2] [1, 1, 1, 1] : tensor<2x12x12x2xf32> to tensor<2x9x9x2xf32>
// CHECK-NEXT: return %[[EXTRACTED_SLICE]] : tensor<2x9x9x2xf32>
// CHECK-NEXT: }
+
+// -----
+
+func.func @conv2d_type_promotion(%arg0: tensor<2x6x6x5xf16>, %arg1: tensor<2x3x3x5xf16>, %arg2: tensor<1xf32>, %arg3: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %0 = tensor.empty() : tensor<6x6x5x2xf16>
+ %1 = linalg.winograd_filter_transform fmr(F_4_3) ins(%arg1 : tensor<2x3x3x5xf16>) outs(%0 : tensor<6x6x5x2xf16>) -> tensor<6x6x5x2xf16> // no-crash
+ %2 = tensor.empty() : tensor<6x6x1x1x2x5xf16>
+ %3 = linalg.winograd_input_transform fmr(F_4_3) ins(%arg0 : tensor<2x6x6x5xf16>) outs(%2 : tensor<6x6x1x1x2x5xf16>) -> tensor<6x6x1x1x2x5xf16> // no-crash
+ %collapsed = tensor.collapse_shape %1 [[0, 1], [2], [3]] : tensor<6x6x5x2xf16> into tensor<36x5x2xf16>
+ %collapsed_0 = tensor.collapse_shape %3 [[0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf16> into tensor<36x2x5xf16>
+ %4 = tensor.empty() : tensor<36x2x2xf32>
+ %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<36x2x2xf32>) -> tensor<36x2x2xf32>
+ %6 = linalg.batch_matmul ins(%collapsed_0, %collapsed : tensor<36x2x5xf16>, tensor<36x5x2xf16>) outs(%5 : tensor<36x2x2xf32>) -> tensor<36x2x2xf32>
+ %expanded = tensor.expand_shape %6 [[0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32>
+ %7 = linalg.winograd_output_transform fmr(F_4_3) ins(%expanded : tensor<6x6x1x1x2x2xf32>) outs(%arg3 : tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32>
+ return %7 : tensor<2x4x4x2xf32>
+}
+
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0) -> (d0 * 4)>
+// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0, d1) -> ()>
+// CHECK: #[[$ATTR_2:.+]] = affine_map<(d0, d1) -> (d0, d1)>
+// CHECK-LABEL: func.func @conv2d_type_promotion(
+// CHECK-SAME: %[[ARG0:.*]]: tensor<2x6x6x5xf16>,
+// CHECK-SAME: %[[ARG1:.*]]: tensor<2x3x3x5xf16>,
+// CHECK-SAME: %[[ARG2:.*]]: tensor<1xf32>,
+// CHECK-SAME: %[[ARG3:.*]]: tensor<2x4x4x2xf32>) -> tensor<2x4x4x2xf32> {
+// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 1.024000e+03 : f32
+// CHECK-DAG: %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}1.250000e-01, 0.000000e+00, 0.000000e+00, 0.000000e+00], [2.500000e-01, -2.500000e-01, 2.500000e-01, -2.500000e-01], [2.500000e-01, 2.500000e-01, 2.500000e-01, 2.500000e-01], [1.250000e-01, -2.500000e-01, 5.000000e-01, -1.000000e+00], [1.250000e-01, 2.500000e-01, 5.000000e-01, 1.000000e+00], [0.000000e+00, 0.000000e+00, 0.000000e+00, 5.000000e-01]]> : tensor<6x4xf32>
+// CHECK-DAG: %[[VAL_2:.*]] = arith.constant dense<{{\[\[}}1.250000e-01, 2.500000e-01, 2.500000e-01, 1.250000e-01, 1.250000e-01, 0.000000e+00], [0.000000e+00, -2.500000e-01, 2.500000e-01, -2.500000e-01, 2.500000e-01, 0.000000e+00], [0.000000e+00, 2.500000e-01, 2.500000e-01, 5.000000e-01, 5.000000e-01, 0.000000e+00], [0.000000e+00, -2.500000e-01, 2.500000e-01, -1.000000e+00, 1.000000e+00, 5.000000e-01]]> : tensor<4x6xf32>
+// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<{{\[\[}}2.500000e-01, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00], [0.000000e+00, 2.500000e-01, -2.500000e-01, 2.500000e-01, -2.500000e-01, 2.500000e-01], [-3.125000e-01, -2.500000e-01, -2.500000e-01, -1.250000e-01, -1.250000e-01, 0.000000e+00], [0.000000e+00, -6.250000e-02, 6.250000e-02, -2.500000e-01, 2.500000e-01, -3.125000e-01], [6.250000e-02, 6.250000e-02, 6.250000e-02, 1.250000e-01, 1.250000e-01, 0.000000e+00], [0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 0.000000e+00, 6.250000e-02]]> : tensor<6x6xf16>
+// CHECK-DAG: %[[VAL_4:.*]] = arith.constant dense<{{\[\[}}2.500000e-01, 0.000000e+00, -3.125000e-01, 0.000000e+00, 6.250000e-02, 0.000000e+00], [0.000000e+00, 2.500000e-01, -2.500000e-01, -6.250000e-02, 6.250000e-02, 0.000000e+00], [0.000000e+00, -2.500000e-01, -2.500000e-01, 6.250000e-02, 6.250000e-02, 0.000000e+00], [0.000000e+00, 2.500000e-01, -1.250000e-01, -2.500000e-01, 1.250000e-01, 0.000000e+00], [0.000000e+00, -2.500000e-01, -1.250000e-01, 2.500000e-01, 1.250000e-01, 0.000000e+00], [0.000000e+00, 2.500000e-01, 0.000000e+00, -3.125000e-01, 0.000000e+00, 6.250000e-02]]> : tensor<6x6xf16>
+// CHECK-DAG: %[[VAL_5:.*]] = arith.constant dense<{{\[\[}}1.000000e+00, -3.332520e-01, -3.332520e-01, 8.331300e-02, 8.331300e-02, 0.000000e+00], [0.000000e+00, 3.332520e-01, -3.332520e-01, -1.666260e-01, 1.666260e-01, 0.000000e+00], [0.000000e+00, -3.332520e-01, -3.332520e-01, 3.332520e-01, 3.332520e-01, 1.000000e+00]]> : tensor<3x6xf16>
+// CHECK-DAG: %[[VAL_6:.*]] = arith.constant dense<{{\[\[}}1.000000e+00, 0.000000e+00, 0.000000e+00], [-3.332520e-01, 3.332520e-01, -3.332520e-01], [-3.332520e-01, -3.332520e-01, -3.332520e-01], [8.331300e-02, -1.666260e-01, 3.332520e-01], [8.331300e-02, 1.666260e-01, 3.332520e-01], [0.000000e+00, 0.000000e+00, 1.000000e+00]]> : tensor<6x3xf16>
+// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f16
+// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 5 : index
+// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[VAL_12:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAL_13:.*]] = tensor.empty() : tensor<6x6x5x2xf16>
+// CHECK-NEXT: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_8]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (tensor<6x6x5x2xf16>) {
+// CHECK-NEXT: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_8]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (tensor<6x6x5x2xf16>) {
+// CHECK-NEXT: %[[VAL_20:.*]] = tensor.extract_slice %[[ARG1]]{{\[}}%[[VAL_15]], %[[VAL_11]], %[[VAL_11]], %[[VAL_18]]] [1, 3, 3, 1] [1, 1, 1, 1] : tensor<2x3x3x5xf16> to tensor<3x3xf16>
+// CHECK-NEXT: %[[VAL_21:.*]] = tensor.empty() : tensor<6x3xf16>
+// CHECK-NEXT: %[[VAL_22:.*]] = linalg.fill ins(%[[VAL_7]] : f16) outs(%[[VAL_21]] : tensor<6x3xf16>) -> tensor<6x3xf16>
+// CHECK-NEXT: %[[VAL_23:.*]] = linalg.matmul ins(%[[VAL_6]], %[[VAL_20]] : tensor<6x3xf16>, tensor<3x3xf16>) outs(%[[VAL_22]] : tensor<6x3xf16>) -> tensor<6x3xf16>
+// CHECK-NEXT: %[[VAL_24:.*]] = tensor.empty() : tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_25:.*]] = linalg.fill ins(%[[VAL_7]] : f16) outs(%[[VAL_24]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_26:.*]] = linalg.matmul ins(%[[VAL_23]], %[[VAL_5]] : tensor<6x3xf16>, tensor<3x6xf16>) outs(%[[VAL_25]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_27:.*]] = tensor.insert_slice %[[VAL_26]] into %[[VAL_19]]{{\[}}%[[VAL_11]], %[[VAL_11]], %[[VAL_18]], %[[VAL_15]]] [6, 6, 1, 1] [1, 1, 1, 1] : tensor<6x6xf16> into tensor<6x6x5x2xf16>
+// CHECK-NEXT: scf.yield %[[VAL_27]] : tensor<6x6x5x2xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_17]] : tensor<6x6x5x2xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[VAL_28:.*]] = tensor.empty() : tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: %[[VAL_29:.*]] = scf.for %[[VAL_30:.*]] = %[[VAL_11]] to %[[VAL_8]] step %[[VAL_8]] iter_args(%[[VAL_31:.*]] = %[[VAL_28]]) -> (tensor<6x6x1x1x2x5xf16>) {
+// CHECK-NEXT: %[[VAL_32:.*]] = scf.for %[[VAL_33:.*]] = %[[VAL_11]] to %[[VAL_8]] step %[[VAL_8]] iter_args(%[[VAL_34:.*]] = %[[VAL_31]]) -> (tensor<6x6x1x1x2x5xf16>) {
+// CHECK-NEXT: %[[VAL_35:.*]] = scf.for %[[VAL_36:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_8]] iter_args(%[[VAL_37:.*]] = %[[VAL_34]]) -> (tensor<6x6x1x1x2x5xf16>) {
+// CHECK-NEXT: %[[VAL_38:.*]] = scf.for %[[VAL_39:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_8]] iter_args(%[[VAL_40:.*]] = %[[VAL_37]]) -> (tensor<6x6x1x1x2x5xf16>) {
+// CHECK-NEXT: %[[VAL_41:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_30]])
+// CHECK-NEXT: %[[VAL_42:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_33]])
+// CHECK-NEXT: %[[VAL_43:.*]] = tensor.extract_slice %[[ARG0]]{{\[}}%[[VAL_36]], %[[VAL_41]], %[[VAL_42]], %[[VAL_39]]] [1, 6, 6, 1] [1, 1, 1, 1] : tensor<2x6x6x5xf16> to tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_44:.*]] = tensor.empty() : tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_45:.*]] = linalg.fill ins(%[[VAL_7]] : f16) outs(%[[VAL_44]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_46:.*]] = linalg.matmul ins(%[[VAL_4]], %[[VAL_43]] : tensor<6x6xf16>, tensor<6x6xf16>) outs(%[[VAL_45]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_47:.*]] = tensor.empty() : tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_48:.*]] = linalg.fill ins(%[[VAL_7]] : f16) outs(%[[VAL_47]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_49:.*]] = linalg.matmul ins(%[[VAL_46]], %[[VAL_3]] : tensor<6x6xf16>, tensor<6x6xf16>) outs(%[[VAL_48]] : tensor<6x6xf16>) -> tensor<6x6xf16>
+// CHECK-NEXT: %[[VAL_50:.*]] = tensor.insert_slice %[[VAL_49]] into %[[VAL_40]][0, 0, %[[VAL_30]], %[[VAL_33]], %[[VAL_36]], %[[VAL_39]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6xf16> into tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: scf.yield %[[VAL_50]] : tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_38]] : tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_35]] : tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_32]] : tensor<6x6x1x1x2x5xf16>
+// CHECK-NEXT: }
+// CHECK-NEXT: %[[VAL_51:.*]] = tensor.collapse_shape %[[VAL_14]] {{\[\[}}0, 1], [2], [3]] : tensor<6x6x5x2xf16> into tensor<36x5x2xf16>
+// CHECK-NEXT: %[[VAL_52:.*]] = tensor.collapse_shape %[[VAL_29]] {{\[\[}}0, 1], [2, 3, 4], [5]] : tensor<6x6x1x1x2x5xf16> into tensor<36x2x5xf16>
+// CHECK-NEXT: %[[VAL_53:.*]] = tensor.empty() : tensor<36x2x2xf32>
+// CHECK-NEXT: %[[VAL_54:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_53]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32>
+// CHECK-NEXT: %[[VAL_55:.*]] = linalg.batch_matmul ins(%[[VAL_52]], %[[VAL_51]] : tensor<36x2x5xf16>, tensor<36x5x2xf16>) outs(%[[VAL_54]] : tensor<36x2x2xf32>) -> tensor<36x2x2xf32>
+// CHECK-NEXT: %[[VAL_56:.*]] = tensor.expand_shape %[[VAL_55]] {{\[\[}}0, 1], [2, 3, 4], [5]] output_shape [6, 6, 1, 1, 2, 2] : tensor<36x2x2xf32> into tensor<6x6x1x1x2x2xf32>
+// CHECK-NEXT: %[[VAL_57:.*]] = scf.for %[[VAL_58:.*]] = %[[VAL_11]] to %[[VAL_8]] step %[[VAL_8]] iter_args(%[[VAL_59:.*]] = %[[ARG3]]) -> (tensor<2x4x4x2xf32>) {
+// CHECK-NEXT: %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_11]] to %[[VAL_8]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_59]]) -> (tensor<2x4x4x2xf32>) {
+// CHECK-NEXT: %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_8]] iter_args(%[[VAL_65:.*]] = %[[VAL_62]]) -> (tensor<2x4x4x2xf32>) {
+// CHECK-NEXT: %[[VAL_66:.*]] = scf.for %[[VAL_67:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_8]] iter_args(%[[VAL_68:.*]] = %[[VAL_65]]) -> (tensor<2x4x4x2xf32>) {
+// CHECK-NEXT: %[[VAL_69:.*]] = tensor.extract_slice %[[VAL_56]][0, 0, %[[VAL_58]], %[[VAL_61]], %[[VAL_64]], %[[VAL_67]]] [6, 6, 1, 1, 1, 1] [1, 1, 1, 1, 1, 1] : tensor<6x6x1x1x2x2xf32> to tensor<6x6xf32>
+// CHECK-NEXT: %[[VAL_70:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_58]])
+// CHECK-NEXT: %[[VAL_71:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_61]])
+// CHECK-NEXT: %[[VAL_72:.*]] = tensor.extract_slice %[[VAL_68]]{{\[}}%[[VAL_64]], %[[VAL_70]], %[[VAL_71]], %[[VAL_67]]] [1, 4, 4, 1] [1, 1, 1, 1] : tensor<2x4x4x2xf32> to tensor<4x4xf32>
+// CHECK-NEXT: %[[VAL_73:.*]] = tensor.empty() : tensor<4x6xf32>
+// CHECK-NEXT: %[[VAL_74:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_73]] : tensor<4x6xf32>) -> tensor<4x6xf32>
+// CHECK-NEXT: %[[VAL_75:.*]] = linalg.matmul ins(%[[VAL_2]], %[[VAL_69]] : tensor<4x6xf32>, tensor<6x6xf32>) outs(%[[VAL_74]] : tensor<4x6xf32>) -> tensor<4x6xf32>
+// CHECK-NEXT: %[[VAL_76:.*]] = tensor.empty() : tensor<4x4xf32>
+// CHECK-NEXT: %[[VAL_77:.*]] = linalg.fill ins(%[[VAL_12]] : f32) outs(%[[VAL_76]] : tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT: %[[VAL_78:.*]] = linalg.matmul ins(%[[VAL_75]], %[[VAL_1]] : tensor<4x6xf32>, tensor<6x4xf32>) outs(%[[VAL_77]] : tensor<4x4xf32>) -> tensor<4x4xf32>
+// CHECK-NEXT: %[[VAL_79:.*]] = linalg.generic {indexing_maps = [#[[$ATTR_1]], #[[$ATTR_2]], #[[$ATTR_2]]], iterator_types = ["parallel", "parallel"]} ins(%[[VAL_0]], %[[VAL_78]] : f32, tensor<4x4xf32>) outs(%[[VAL_72]] : tensor<4x4xf32>) {
+// CHECK-NEXT: ^bb0(%[[VAL_80:.*]]: f32, %[[VAL_81:.*]]: f32, %[[VAL_82:.*]]: f32):
+// CHECK-NEXT: %[[VAL_83:.*]] = arith.mulf %[[VAL_80]], %[[VAL_81]] : f32
+// CHECK-NEXT: %[[VAL_84:.*]] = arith.addf %[[VAL_83]], %[[VAL_82]] : f32
+// CHECK-NEXT: linalg.yield %[[VAL_84]] : f32
+// CHECK-NEXT: } -> tensor<4x4xf32>
+// CHECK-NEXT: %[[VAL_85:.*]] = tensor.insert_slice %[[VAL_79]] into %[[VAL_68]]{{\[}}%[[VAL_64]], %[[VAL_70]], %[[VAL_71]], %[[VAL_67]]] [1, 4, 4, 1] [1, 1, 1, 1] : tensor<4x4xf32> into tensor<2x4x4x2xf32>
+// CHECK-NEXT: scf.yield %[[VAL_85]] : tensor<2x4x4x2xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_66]] : tensor<2x4x4x2xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_63]] : tensor<2x4x4x2xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: scf.yield %[[VAL_60]] : tensor<2x4x4x2xf32>
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[VAL_57]] : tensor<2x4x4x2xf32>
+// CHECK-NEXT: } \ No newline at end of file
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir
new file mode 100644
index 0000000..603ace8
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-alloc.mlir
@@ -0,0 +1,24 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=alloc}))" 2>&1 | FileCheck %s
+
+func.func @test_static_memref_alloc() {
+ %0 = memref.alloca() {test.ptr} : memref<10x20xf32>
+ // CHECK: Successfully generated alloc for operation: %[[ORIG:.*]] = memref.alloca() {test.ptr} : memref<10x20xf32>
+ // CHECK: Generated: %{{.*}} = memref.alloca() : memref<10x20xf32>
+ return
+}
+
+// -----
+
+func.func @test_dynamic_memref_alloc() {
+ %c10 = arith.constant 10 : index
+ %c20 = arith.constant 20 : index
+ %orig = memref.alloc(%c10, %c20) {test.ptr} : memref<?x?xf32>
+
+ // CHECK: Successfully generated alloc for operation: %[[ORIG:.*]] = memref.alloc(%[[C10:.*]], %[[C20:.*]]) {test.ptr} : memref<?x?xf32>
+ // CHECK: Generated: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: Generated: %[[DIM0:.*]] = memref.dim %[[ORIG]], %[[C0]] : memref<?x?xf32>
+ // CHECK: Generated: %[[C1:.*]] = arith.constant 1 : index
+ // CHECK: Generated: %[[DIM1:.*]] = memref.dim %[[ORIG]], %[[C1]] : memref<?x?xf32>
+ // CHECK: Generated: %{{.*}} = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+ return
+}
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-copy.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-copy.mlir
new file mode 100644
index 0000000..9220d84
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-copy.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=copy}))" 2>&1 | FileCheck %s
+
+func.func @test_copy_static() {
+ %src = memref.alloca() {test.src_ptr} : memref<10x20xf32>
+ %dest = memref.alloca() {test.dest_ptr} : memref<10x20xf32>
+
+ // CHECK: Successfully generated copy from source: %[[SRC:.*]] = memref.alloca() {test.src_ptr} : memref<10x20xf32> to destination: %[[DEST:.*]] = memref.alloca() {test.dest_ptr} : memref<10x20xf32>
+ // CHECK: Generated: memref.copy %[[SRC]], %[[DEST]] : memref<10x20xf32> to memref<10x20xf32>
+ return
+}
+
+// -----
+
+func.func @test_copy_dynamic() {
+ %c10 = arith.constant 10 : index
+ %c20 = arith.constant 20 : index
+ %src = memref.alloc(%c10, %c20) {test.src_ptr} : memref<?x?xf32>
+ %dest = memref.alloc(%c10, %c20) {test.dest_ptr} : memref<?x?xf32>
+
+ // CHECK: Successfully generated copy from source: %[[SRC:.*]] = memref.alloc(%[[C10:.*]], %[[C20:.*]]) {test.src_ptr} : memref<?x?xf32> to destination: %[[DEST:.*]] = memref.alloc(%[[C10]], %[[C20]]) {test.dest_ptr} : memref<?x?xf32>
+ // CHECK: Generated: memref.copy %[[SRC]], %[[DEST]] : memref<?x?xf32> to memref<?x?xf32>
+ return
+}
diff --git a/mlir/test/Dialect/OpenACC/pointer-like-interface-free.mlir b/mlir/test/Dialect/OpenACC/pointer-like-interface-free.mlir
new file mode 100644
index 0000000..ecf4f75
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/pointer-like-interface-free.mlir
@@ -0,0 +1,31 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(func.func(test-acc-pointer-like-interface{test-mode=free}))" 2>&1 | FileCheck %s
+
+func.func @test_static_memref_free() {
+ %0 = memref.alloca() {test.ptr} : memref<10x20xf32>
+ // CHECK: Successfully generated free for operation: %[[ORIG:.*]] = memref.alloca() {test.ptr} : memref<10x20xf32>
+ // CHECK-NOT: Generated
+ return
+}
+
+// -----
+
+func.func @test_dynamic_memref_free() {
+ %c10 = arith.constant 10 : index
+ %c20 = arith.constant 20 : index
+ %orig = memref.alloc(%c10, %c20) {test.ptr} : memref<?x?xf32>
+
+ // CHECK: Successfully generated free for operation: %[[ORIG:.*]] = memref.alloc(%[[C10:.*]], %[[C20:.*]]) {test.ptr} : memref<?x?xf32>
+ // CHECK: Generated: memref.dealloc %[[ORIG]] : memref<?x?xf32>
+ return
+}
+
+// -----
+
+func.func @test_cast_walking_free() {
+ %0 = memref.alloca() : memref<10x20xf32>
+ %1 = memref.cast %0 {test.ptr} : memref<10x20xf32> to memref<?x?xf32>
+
+ // CHECK: Successfully generated free for operation: %[[CAST:.*]] = memref.cast %[[ALLOCA:.*]] {test.ptr} : memref<10x20xf32> to memref<?x?xf32>
+ // CHECK-NOT: Generated
+ return
+}
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index c07edac..eb369c0 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -3322,6 +3322,46 @@ func.func @from_elements_to_elements_shuffle(%a: vector<4x2xf32>) -> vector<4x2x
// -----
+// CHECK-LABEL: func @to_elements_of_scalar_broadcast_folds
+// CHECK-SAME: (%[[S:.*]]: f32) -> (f32, f32, f32, f32)
+func.func @to_elements_of_scalar_broadcast_folds(%s: f32) -> (f32, f32, f32, f32) {
+ %v = vector.broadcast %s : f32 to vector<4xf32>
+ %e:4 = vector.to_elements %v : vector<4xf32>
+ // CHECK-NOT: vector.broadcast
+ // CHECK-NOT: vector.to_elements
+ // CHECK: return %[[S]], %[[S]], %[[S]], %[[S]]
+ return %e#0, %e#1, %e#2, %e#3 : f32, f32, f32, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @to_elements_of_vector_broadcast
+// CHECK-SAME: (%[[VEC:.*]]: vector<2xf32>) -> (f32, f32, f32, f32, f32, f32)
+func.func @to_elements_of_vector_broadcast(%vec: vector<2xf32>) -> (f32, f32, f32, f32, f32, f32) {
+ %v = vector.broadcast %vec : vector<2xf32> to vector<3x2xf32>
+ %e:6 = vector.to_elements %v : vector<3x2xf32>
+ // CHECK-NOT: vector.broadcast
+ // CHECK: %[[SRC_ELEMS:.*]]:2 = vector.to_elements %[[VEC]]
+ // CHECK: return %[[SRC_ELEMS]]#0, %[[SRC_ELEMS]]#1, %[[SRC_ELEMS]]#0, %[[SRC_ELEMS]]#1, %[[SRC_ELEMS]]#0, %[[SRC_ELEMS]]#1
+ return %e#0, %e#1, %e#2, %e#3, %e#4, %e#5 : f32, f32, f32, f32, f32, f32
+}
+
+// -----
+
+// CHECK-LABEL: func @to_elements_of_vector_broadcast_inner_dim
+// CHECK-SAME: (%[[V:.*]]: vector<2x1x2xf32>) -> (f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)
+func.func @to_elements_of_vector_broadcast_inner_dim(%v: vector<2x1x2xf32>) -> (f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) {
+ %b = vector.broadcast %v : vector<2x1x2xf32> to vector<2x3x2xf32>
+ %e:12 = vector.to_elements %b : vector<2x3x2xf32>
+ // CHECK-NOT: vector.broadcast
+ // CHECK: %[[SRC:.*]]:4 = vector.to_elements %[[V]] : vector<2x1x2xf32>
+ // CHECK: return %[[SRC]]#0, %[[SRC]]#1, %[[SRC]]#0, %[[SRC]]#1, %[[SRC]]#0, %[[SRC]]#1, %[[SRC]]#2, %[[SRC]]#3, %[[SRC]]#2, %[[SRC]]#3, %[[SRC]]#2, %[[SRC]]#3
+ return %e#0, %e#1, %e#2, %e#3, %e#4, %e#5, %e#6, %e#7, %e#8, %e#9, %e#10, %e#11 :
+ f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32
+}
+
+// -----
+
// +---------------------------------------------------------------------------
// Tests for foldFromElementsToConstant
// +---------------------------------------------------------------------------
diff --git a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
index 01a826a..ae46de1 100644
--- a/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/assume-alignment-runtime-verification.mlir
@@ -38,7 +38,7 @@ func.func @main() {
%buffer = builtin.unrealized_conversion_cast %10 : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<1xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: %[[ASSUME:.*]] = "memref.assume_alignment"(%{{.*}}) <{alignment = 4 : i32}> : (memref<1xf32>)
+ // CHECK-NEXT: %[[ASSUME:.*]] = memref.assume_alignment %{{.*}}, 4 : memref<1xf32>
// CHECK-NEXT: ^ memref is not aligned to 4
// CHECK-NEXT: Location: loc({{.*}})
%assume = memref.assume_alignment %buffer, 4 : memref<1xf32>
diff --git a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
index 1144a7c..6a7984c 100644
--- a/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/atomic-rmw-runtime-verification.mlir
@@ -41,7 +41,7 @@ func.func @main() {
%cast = memref.cast %buffer : memref<5xf32> to memref<?xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.atomic_rmw"(%{{.*}}, %{{.*}}, %{{.*}}) <{kind = 0 : i64}> : (f32, memref<?xf32>, index) -> f32
+ // CHECK-NEXT: memref.atomic_rmw addf %{{.*}}, %{{.*}} : (f32, memref<?xf32>) -> f32
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
%c9 = arith.constant 9 : index
diff --git a/mlir/test/Integration/Dialect/MemRef/cast-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/cast-runtime-verification.mlir
index 1ac1030..b605c77 100644
--- a/mlir/test/Integration/Dialect/MemRef/cast-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/cast-runtime-verification.mlir
@@ -43,26 +43,26 @@ func.func @main() {
%alloc = memref.alloc() : memref<5xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.cast"(%{{.*}}) : (memref<?xf32>) -> memref<10xf32>
+ // CHECK-NEXT: memref.cast %{{.*}} : memref<?xf32> to memref<10xf32>
// CHECK-NEXT: ^ size mismatch of dim 0
// CHECK-NEXT: Location: loc({{.*}})
%1 = memref.cast %alloc : memref<5xf32> to memref<?xf32>
func.call @cast_to_static_dim(%1) : (memref<?xf32>) -> (memref<10xf32>)
// CHECK-NEXT: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.cast"(%{{.*}}) : (memref<*xf32>) -> memref<f32>
+ // CHECK-NEXT: memref.cast %{{.*}} : memref<*xf32> to memref<f32>
// CHECK-NEXT: ^ rank mismatch
// CHECK-NEXT: Location: loc({{.*}})
%3 = memref.cast %alloc : memref<5xf32> to memref<*xf32>
func.call @cast_to_ranked(%3) : (memref<*xf32>) -> (memref<f32>)
// CHECK-NEXT: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.cast"(%{{.*}}) : (memref<?xf32, strided<[?], offset: ?>>) -> memref<?xf32, strided<[9], offset: 5>>
+ // CHECK-NEXT: memref.cast %{{.*}} : memref<?xf32, strided<[?], offset: ?>>
// CHECK-NEXT: ^ offset mismatch
// CHECK-NEXT: Location: loc({{.*}})
// CHECK-NEXT: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.cast"(%{{.*}}) : (memref<?xf32, strided<[?], offset: ?>>) -> memref<?xf32, strided<[9], offset: 5>>
+ // CHECK-NEXT: memref.cast %{{.*}} : memref<?xf32, strided<[?], offset: ?>>
// CHECK-NEXT: ^ stride mismatch of dim 0
// CHECK-NEXT: Location: loc({{.*}})
%4 = memref.cast %alloc
diff --git a/mlir/test/Integration/Dialect/MemRef/copy-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/copy-runtime-verification.mlir
index be9417b..413cb19 100644
--- a/mlir/test/Integration/Dialect/MemRef/copy-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/copy-runtime-verification.mlir
@@ -28,7 +28,7 @@ func.func @main() {
%cast2 = memref.cast %alloca2 : memref<5xf32> to memref<?xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.copy"(%{{.*}}, %{{.*}}) : (memref<?xf32>, memref<?xf32>) -> ()
+ // CHECK-NEXT: memref.copy %{{.*}}, %{{.*}} : memref<?xf32> to memref<?xf32>
// CHECK-NEXT: ^ size of 0-th source/target dim does not match
// CHECK-NEXT: Location: loc({{.*}})
call @memcpy_helper(%cast1, %cast2) : (memref<?xf32>, memref<?xf32>) -> ()
diff --git a/mlir/test/Integration/Dialect/MemRef/dim-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/dim-runtime-verification.mlir
index ef4af62..8f5a2c7 100644
--- a/mlir/test/Integration/Dialect/MemRef/dim-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/dim-runtime-verification.mlir
@@ -20,7 +20,7 @@ func.func @main() {
%alloca = memref.alloca() : memref<1xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.dim"(%{{.*}}, %{{.*}}) : (memref<1xf32>, index) -> index
+ // CHECK-NEXT: memref.dim %{{.*}}, %{{.*}} : memref<1xf32>
// CHECK-NEXT: ^ index is out of bounds
// CHECK-NEXT: Location: loc({{.*}})
%dim = memref.dim %alloca, %c4 : memref<1xf32>
diff --git a/mlir/test/Integration/Dialect/MemRef/load-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/load-runtime-verification.mlir
index 2e42648..364880c 100644
--- a/mlir/test/Integration/Dialect/MemRef/load-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/load-runtime-verification.mlir
@@ -40,19 +40,19 @@ func.func @main() {
%alloc_2x2x2 = memref.alloc(%2, %2, %2) : memref<?x?x?xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.load"(%{{.*}}, %{{.*}}) : (memref<1xf32>, index) -> f32
+ // CHECK-NEXT: memref.load %{{.*}}[%{{.*}}] : memref<1xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @load(%alloca_1, %1) : (memref<1xf32>, index) -> ()
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.load"(%{{.*}}, %{{.*}}) : (memref<?xf32>, index) -> f32
+ // CHECK-NEXT: memref.load %{{.*}}[%{{.*}}] : memref<?xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @load_dynamic(%alloc_1, %1) : (memref<?xf32>, index) -> ()
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.load"(%{{.*}}, %{{.*}}) : (memref<?x?x?xf32>, index, index, index) -> f32
+ // CHECK-NEXT: memref.load %{{.*}}[%{{.*}}] : memref<?x?x?xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @load_nd_dynamic(%alloc_2x2x2, %1, %n1, %0) : (memref<?x?x?xf32>, index, index, index) -> ()
diff --git a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
index 82e6380..760f2a7 100644
--- a/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/store-runtime-verification.mlir
@@ -41,7 +41,7 @@ func.func @main() {
%cast = memref.cast %buffer : memref<5xf32> to memref<?xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.store"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, memref<?xf32>, index) -> ()
+ // CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
%c9 = arith.constant 9 : index
diff --git a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
index 9fbe5bc..71e813c 100644
--- a/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/MemRef/subview-runtime-verification.mlir
@@ -51,47 +51,47 @@ func.func @main() {
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 1>, static_strides = array<i64: -9223372036854775808, 1>}> : (memref<?x4xf32>, index, index, index) -> memref<?xf32, strided<[?], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 1] [%{{.*}}, 1] : memref<?x4xf32> to memref<?xf32, strided<[?], offset: ?>>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 1>, static_strides = array<i64: -9223372036854775808, 1>}> : (memref<?x4xf32>, index, index, index) -> memref<?xf32, strided<[?], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 1] [%{{.*}}, 1] : memref<?x4xf32> to memref<?xf32, strided<[?], offset: ?>>
// CHECK-NEXT: ^ subview runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @subview_dynamic_rank_reduce(%alloca_4_dyn, %5, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (memref<1xf32>, index) -> memref<1xf32, strided<[1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}] [1] [1] : memref<1xf32> to memref<1xf32, strided<[1], offset: ?>>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (memref<1xf32>, index) -> memref<1xf32, strided<[1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}] [1] [1] : memref<1xf32> to memref<1xf32, strided<[1], offset: ?>>
// CHECK-NEXT: ^ subview runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @subview(%alloca, %1) : (memref<1xf32>, index) -> ()
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (memref<1xf32>, index) -> memref<1xf32, strided<[1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}] [1] [1] : memref<1xf32> to memref<1xf32, strided<[1], offset: ?>>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (memref<1xf32>, index) -> memref<1xf32, strided<[1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}] [1] [1] : memref<1xf32> to memref<1xf32, strided<[1], offset: ?>>
// CHECK-NEXT: ^ subview runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @subview(%alloca, %n1) : (memref<1xf32>, index) -> ()
// Slice runs out-of-bounds due to size
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: -9223372036854775808, 1>}> : (memref<?x4xf32>, index, index, index) -> memref<?x4xf32, strided<[?, 1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 4] [%{{.*}}, 1] : memref<?x4xf32> to memref<?x4xf32, strided<[?, 1], offset: ?>>
// CHECK-NEXT: ^ subview runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @subview_dynamic(%alloca_4_dyn, %0, %5, %1) : (memref<?x4xf32>, index, index, index) -> ()
// Slice runs out-of-bounds due to stride
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "memref.subview"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: -9223372036854775808, 1>}> : (memref<?x4xf32>, index, index, index) -> memref<?x4xf32, strided<[?, 1], offset: ?>>
+ // CHECK-NEXT: memref.subview %{{.*}}[%{{.*}}, 0] [%{{.*}}, 4] [%{{.*}}, 1] : memref<?x4xf32> to memref<?x4xf32, strided<[?, 1], offset: ?>>
// CHECK-NEXT: ^ subview runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @subview_dynamic(%alloca_4_dyn, %0, %4, %4) : (memref<?x4xf32>, index, index, index) -> ()
diff --git a/mlir/test/Integration/Dialect/Tensor/cast-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/cast-runtime-verification.mlir
index f37a6d6..a96b2be 100644
--- a/mlir/test/Integration/Dialect/Tensor/cast-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Tensor/cast-runtime-verification.mlir
@@ -40,14 +40,14 @@ func.func @main() {
%alloc = tensor.empty() : tensor<5xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.cast"(%{{.*}}) : (tensor<?xf32>) -> tensor<10xf32>
+ // CHECK-NEXT: tensor.cast %{{.*}} : tensor<?xf32> to tensor<10xf32>
// CHECK-NEXT: ^ size mismatch of dim 0
// CHECK-NEXT: Location: loc({{.*}})
%1 = tensor.cast %alloc : tensor<5xf32> to tensor<?xf32>
func.call @cast_to_static_dim(%1) : (tensor<?xf32>) -> (tensor<10xf32>)
// CHECK-NEXT: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.cast"(%{{.*}}) : (tensor<*xf32>) -> tensor<f32>
+ // CHECK-NEXT: tensor.cast %{{.*}} : tensor<*xf32> to tensor<f32>
// CHECK-NEXT: ^ rank mismatch
// CHECK-NEXT: Location: loc({{.*}})
%3 = tensor.cast %alloc : tensor<5xf32> to tensor<*xf32>
diff --git a/mlir/test/Integration/Dialect/Tensor/dim-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/dim-runtime-verification.mlir
index e9e5c04..1a26ebe 100644
--- a/mlir/test/Integration/Dialect/Tensor/dim-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Tensor/dim-runtime-verification.mlir
@@ -22,7 +22,7 @@ func.func @main() {
%tensor = tensor.empty() : tensor<1xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.dim"(%{{.*}}, %{{.*}}) : (tensor<1xf32>, index) -> index
+ // CHECK-NEXT: tensor.dim %{{.*}}, %{{.*}} : tensor<1xf32>
// CHECK-NEXT: ^ index is out of bounds
// CHECK-NEXT: Location: loc({{.*}})
%dim = tensor.dim %tensor, %c4 : tensor<1xf32>
diff --git a/mlir/test/Integration/Dialect/Tensor/extract-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/extract-runtime-verification.mlir
index 73fcec4..cc252a2b 100644
--- a/mlir/test/Integration/Dialect/Tensor/extract-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Tensor/extract-runtime-verification.mlir
@@ -44,19 +44,19 @@ func.func @main() {
%alloc_2x2x2 = tensor.empty(%2, %2, %2) : tensor<?x?x?xf32>
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract"(%{{.*}}, %{{.*}}) : (tensor<1xf32>, index) -> f32
+ // CHECK-NEXT: tensor.extract %{{.*}}[%{{.*}}] : tensor<1xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract(%alloca_1, %1) : (tensor<1xf32>, index) -> ()
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract"(%{{.*}}, %{{.*}}) : (tensor<?xf32>, index) -> f32
+ // CHECK-NEXT: tensor.extract %{{.*}}[%{{.*}}] : tensor<?xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_dynamic(%alloc_1, %1) : (tensor<?xf32>, index) -> ()
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract"(%{{.*}}, %{{.*}}) : (tensor<?x?x?xf32>, index, index, index) -> f32
+ // CHECK-NEXT: tensor.extract %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : tensor<?x?x?xf32>
// CHECK-NEXT: ^ out-of-bounds access
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_nd_dynamic(%alloc_2x2x2, %1, %n1, %0) : (tensor<?x?x?xf32>, index, index, index) -> ()
diff --git a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
index 341a59e..0c7c4a6 100644
--- a/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
+++ b/mlir/test/Integration/Dialect/Tensor/extract_slice-runtime-verification.mlir
@@ -47,47 +47,47 @@ func.func @main() {
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1, %arg2, %arg3) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 1>, static_strides = array<i64: -9223372036854775808, 1>}> : (tensor<?x4xf32>, index, index, index) -> tensor<?xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, 1] [%{{.*}}, 1] : tensor<?x4xf32> to tensor<?xf32>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1, %arg2, %arg3) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 1>, static_strides = array<i64: -9223372036854775808, 1>}> : (tensor<?x4xf32>, index, index, index) -> tensor<?xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, 1] [%{{.*}}, 1] : tensor<?x4xf32> to tensor<?xf32>
// CHECK-NEXT: ^ extract_slice runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_slice_dynamic_rank_reduce(%alloca_4_dyn, %5, %5, %1) : (tensor<?x4xf32>, index, index, index) -> ()
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (tensor<1xf32>, index) -> tensor<1xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor<1xf32> to tensor<1xf32>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (tensor<1xf32>, index) -> tensor<1xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor<1xf32> to tensor<1xf32>
// CHECK-NEXT: ^ extract_slice runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_slice(%alloca, %1) : (tensor<1xf32>, index) -> ()
// Offset is out-of-bounds and slice runs out-of-bounds
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (tensor<1xf32>, index) -> tensor<1xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor<1xf32> to tensor<1xf32>
// CHECK-NEXT: ^ offset 0 is out-of-bounds
// CHECK-NEXT: Location: loc({{.*}})
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 1, 0, 0>, static_offsets = array<i64: -9223372036854775808>, static_sizes = array<i64: 1>, static_strides = array<i64: 1>}> : (tensor<1xf32>, index) -> tensor<1xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor<1xf32> to tensor<1xf32>
// CHECK-NEXT: ^ extract_slice runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_slice(%alloca, %n1) : (tensor<1xf32>, index) -> ()
// Slice runs out-of-bounds due to size
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1, %arg2, %arg3) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: -9223372036854775808, 1>}> : (tensor<?x4xf32>, index, index, index) -> tensor<?x4xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, 4] [%{{.*}}, 1] : tensor<?x4xf32> to tensor<?x4xf32>
// CHECK-NEXT: ^ extract_slice runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_slice_dynamic(%alloca_4_dyn, %0, %5, %1) : (tensor<?x4xf32>, index, index, index) -> ()
// Slice runs out-of-bounds due to stride
// CHECK: ERROR: Runtime op verification failed
- // CHECK-NEXT: "tensor.extract_slice"(%arg0, %arg1, %arg2, %arg3) <{operandSegmentSizes = array<i32: 1, 1, 1, 1>, static_offsets = array<i64: -9223372036854775808, 0>, static_sizes = array<i64: -9223372036854775808, 4>, static_strides = array<i64: -9223372036854775808, 1>}> : (tensor<?x4xf32>, index, index, index) -> tensor<?x4xf32>
+ // CHECK-NEXT: tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, 4] [%{{.*}}, 1] : tensor<?x4xf32> to tensor<?x4xf32>
// CHECK-NEXT: ^ extract_slice runs out-of-bounds along dimension 0
// CHECK-NEXT: Location: loc({{.*}})
func.call @extract_slice_dynamic(%alloca_4_dyn, %0, %4, %4) : (tensor<?x4xf32>, index, index, index) -> ()
diff --git a/mlir/test/Target/Cpp/do.mlir b/mlir/test/Target/Cpp/do.mlir
new file mode 100644
index 0000000..38cbc81
--- /dev/null
+++ b/mlir/test/Target/Cpp/do.mlir
@@ -0,0 +1,168 @@
+// RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s -check-prefix=CPP-DEFAULT
+
+
+// CPP-DEFAULT-LABEL: void emitc_do(
+// CPP-DEFAULT: int32_t* [[VAL_1:v[0-9]+]]) {
+// CPP-DEFAULT: int32_t [[VAL_2:v[0-9]+]] = 0;
+// CPP-DEFAULT: do {
+// CPP-DEFAULT: printf("%d", *[[VAL_1]]);
+// CPP-DEFAULT: int32_t [[VAL_3:v[0-9]+]] = [[VAL_2]];
+// CPP-DEFAULT: int32_t [[VAL_4:v[0-9]+]] = [[VAL_3]] + 1;
+// CPP-DEFAULT: [[VAL_2]] = [[VAL_4]];
+// CPP-DEFAULT: } while ([[VAL_2]] <= 10);
+// CPP-DEFAULT: return;
+// CPP-DEFAULT: }
+
+emitc.func @emitc_do(%arg0 : !emitc.ptr<i32>) {
+ %var = "emitc.variable"() <{value = 0 : i32}> : () -> !emitc.lvalue<i32>
+ %0 = literal "10" : i32
+ %1 = literal "1" : i32
+
+ do {
+ verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ %var_load = load %var : <i32>
+ %tmp_add = add %var_load, %1 : (i32, i32) -> i32
+ "emitc.assign"(%var, %tmp_add) : (!emitc.lvalue<i32>, i32) -> ()
+ } while {
+ %r = expression %var, %0 : (!emitc.lvalue<i32>, i32) -> i1 {
+ %var_load = load %var : <i32>
+ %cmp = cmp le, %var_load, %0 : (i32, i32) -> i1
+ yield %cmp : i1
+ }
+
+ yield %r : i1
+ }
+
+ return
+}
+
+
+// CPP-DEFAULT-LABEL: void emitc_do_with_expression(
+// CPP-DEFAULT: int32_t* [[VAL_1:v[0-9]+]]) {
+// CPP-DEFAULT: int32_t [[VAL_2:v[0-9]+]] = 0;
+// CPP-DEFAULT: int32_t [[VAL_3:v[0-9]+]] = 10 + 1;
+// CPP-DEFAULT: do {
+// CPP-DEFAULT: printf("%d", *[[VAL_1]]);
+// CPP-DEFAULT: int32_t [[VAL_4:v[0-9]+]] = [[VAL_2]];
+// CPP-DEFAULT: int32_t [[VAL_5:v[0-9]+]] = [[VAL_4]] + 1;
+// CPP-DEFAULT: [[VAL_2]] = [[VAL_5]];
+// CPP-DEFAULT: } while ([[VAL_2]] <= [[VAL_3]]);
+// CPP-DEFAULT: return;
+// CPP-DEFAULT: }
+
+emitc.func @emitc_do_with_expression(%arg0 : !emitc.ptr<i32>) {
+ %var = "emitc.variable"() <{value = 0 : i32}> : () -> !emitc.lvalue<i32>
+ %0 = literal "10" : i32
+ %1 = literal "1" : i32
+
+ %add = expression %0, %1 : (i32, i32) -> i32 {
+ %add = add %0, %1 : (i32, i32) -> i32
+ yield %add : i32
+ }
+
+ do {
+ verbatim "printf(\"%d\", *{});" args %arg0 : !emitc.ptr<i32>
+ %var_load = load %var : <i32>
+ %tmp_add = add %var_load, %1 : (i32, i32) -> i32
+ "emitc.assign"(%var, %tmp_add) : (!emitc.lvalue<i32>, i32) -> ()
+ } while {
+ %r = expression %var, %add : (!emitc.lvalue<i32>, i32) -> i1 {
+ %var_load = load %var : <i32>
+ %cmp = cmp le, %var_load, %add : (i32, i32) -> i1
+ yield %cmp : i1
+ }
+
+ yield %r : i1
+ }
+
+ return
+}
+
+
+// CPP-DEFAULT-LABEL: void emitc_double_do()
+// CPP-DEFAULT: int32_t [[VAL_1:v[0-9]+]] = 0;
+// CPP-DEFAULT: int32_t [[VAL_2:v[0-9]+]] = 0;
+// CPP-DEFAULT: do {
+// CPP-DEFAULT: int32_t [[VAL_3:v[0-9]+]] = [[VAL_1]];
+// CPP-DEFAULT: do {
+// CPP-DEFAULT: int32_t [[VAL_4:v[0-9]+]] = [[VAL_2]];
+// CPP-DEFAULT: printf("i = %d, j = %d", [[VAL_3]], [[VAL_4]]);
+// CPP-DEFAULT: int32_t [[VAL_5:v[0-9]+]] = [[VAL_4]] + 1;
+// CPP-DEFAULT: [[VAL_2]] = [[VAL_5]];
+// CPP-DEFAULT: } while ([[VAL_2]] <= 5);
+// CPP-DEFAULT: int32_t [[VAL_6:v[0-9]+]] = [[VAL_3]] + 1;
+// CPP-DEFAULT: [[VAL_1]] = [[VAL_6]];
+// CPP-DEFAULT: } while ([[VAL_1]] <= 3);
+// CPP-DEFAULT: return;
+// CPP-DEFAULT: }
+
+emitc.func @emitc_double_do() {
+ %var_1 = "emitc.variable"() <{value = 0 : i32}> : () -> !emitc.lvalue<i32>
+ %var_2 = "emitc.variable"() <{value = 0 : i32}> : () -> !emitc.lvalue<i32>
+
+ %step = literal "1" : i32
+ %end_1 = literal "3" : i32
+ %end_2 = literal "5" : i32
+
+ do {
+ %var_1_load = load %var_1 : <i32>
+
+ do {
+ %var_2_load = load %var_2 : <i32>
+ verbatim "printf(\"i = %d, j = %d\", {}, {});" args %var_1_load, %var_2_load : i32, i32
+ %tmp_add = add %var_2_load, %step : (i32, i32) -> i32
+ "emitc.assign"(%var_2, %tmp_add) : (!emitc.lvalue<i32>, i32) -> ()
+ } while {
+ %r = expression %var_2, %end_2 : (!emitc.lvalue<i32>, i32) -> i1 {
+ %var_2_load = load %var_2 : <i32>
+ %cmp = cmp le, %var_2_load, %end_2 : (i32, i32) -> i1
+ yield %cmp : i1
+ }
+
+ yield %r : i1
+ }
+
+ %tmp_add = add %var_1_load, %step : (i32, i32) -> i32
+ "emitc.assign"(%var_1, %tmp_add) : (!emitc.lvalue<i32>, i32) -> ()
+ } while {
+ %r = expression %var_1, %end_1 : (!emitc.lvalue<i32>, i32) -> i1 {
+ %var_1_load = load %var_1 : <i32>
+ %cmp = cmp le, %var_1_load, %end_1 : (i32, i32) -> i1
+ yield %cmp : i1
+ }
+
+ yield %r : i1
+ }
+
+ return
+}
+
+
+// CPP-DEFAULT-LABEL: bool payload_do_with_empty_body(
+// CPP-DEFAULT: int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]]) {
+// CPP-DEFAULT: bool [[VAL_3:v[0-9]+]] = [[VAL_1]] < [[VAL_2]];
+// CPP-DEFAULT: return [[VAL_3]];
+// CPP-DEFAULT: }
+// CPP-DEFAULT: void emitc_do_with_empty_body(
+// CPP-DEFAULT: int32_t [[VAL_1:v[0-9]+]], int32_t [[VAL_2:v[0-9]+]]) {
+// CPP-DEFAULT: do {
+// CPP-DEFAULT: } while (payload_do_with_empty_body([[VAL_1]], [[VAL_2]]));
+// CPP-DEFAULT: return;
+// CPP-DEFAULT: }
+
+emitc.func @payload_do_with_empty_body(%1 : i32, %2 : i32) -> i1 {
+ %cmp = emitc.cmp lt, %1, %2 : (i32, i32) -> i1
+ return %cmp : i1
+}
+func.func @emitc_do_with_empty_body(%arg1 : i32, %arg2 : i32) {
+ emitc.do {
+ } while {
+ %r = emitc.expression %arg1, %arg2 : (i32, i32) -> i1 {
+ %call = emitc.call @payload_do_with_empty_body(%arg1, %arg2) : (i32, i32) -> i1
+ emitc.yield %call : i1
+ }
+ emitc.yield %r: i1
+ }
+
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir
index 04163b5..9928992 100644
--- a/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp6x2.mlir
@@ -3,9 +3,9 @@
// CHECK-LABEL: @convert_f32x2_to_fp6x2_packed
llvm.func @convert_f32x2_to_fp6x2_packed(%srcA : f32, %srcB : f32) {
//CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e2m3x2.rn.satfinite(float %{{.*}}, float %{{.*}})
- %res1 = nvvm.convert.f32x2.to.f6x2 <e2m3> %srcA, %srcB : i16
+ %res1 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : i16 (f6E2M3FN)
//CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e3m2x2.rn.satfinite(float %{{.*}}, float %{{.*}})
- %res2 = nvvm.convert.f32x2.to.f6x2 <e3m2> %srcA, %srcB : i16
+ %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : i16 (f6E3M2FN)
llvm.return
}
@@ -13,9 +13,9 @@ llvm.func @convert_f32x2_to_fp6x2_packed(%srcA : f32, %srcB : f32) {
llvm.func @convert_f32x2_to_fp6x2_vector(%srcA : f32, %srcB : f32) {
//CHECK: %[[res0:.*]] = call i16 @llvm.nvvm.ff.to.e2m3x2.rn.satfinite(float %{{.*}}, float %{{.*}})
//CHECK-NEXT: %{{.*}} = bitcast i16 %[[res0]] to <2 x i8>
- %res1 = nvvm.convert.f32x2.to.f6x2 <e2m3> %srcA, %srcB : vector<2xi8>
+ %res1 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : vector<2xi8> (f6E2M3FN)
//CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e3m2x2.rn.satfinite(float %{{.*}}, float %{{.*}})
//CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8>
- %res2 = nvvm.convert.f32x2.to.f6x2 <e3m2> %srcA, %srcB : vector<2xi8>
+ %res2 = nvvm.convert.f32x2.to.f6x2 %srcA, %srcB : vector<2xi8> (f6E3M2FN)
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir
index 4a15efb..de21826 100644
--- a/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp8x2.mlir
@@ -5,31 +5,31 @@
// CHECK-LABEL: @convert_f32x2_to_f8x2_e4m3
llvm.func @convert_f32x2_to_f8x2_e4m3(%srcA : f32, %srcB : f32) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e4m3x2.rn(float %{{.*}}, float %{{.*}})
- %res1 = nvvm.convert.f32x2.to.f8x2 <e4m3> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E4M3FN)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e4m3x2.rn.relu(float %{{.*}}, float %{{.*}})
- %res2 = nvvm.convert.f32x2.to.f8x2 <e4m3> %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E4M3FN)
llvm.return
}
// CHECK-LABEL: @convert_f32x2_to_f8x2_e5m2
llvm.func @convert_f32x2_to_f8x2_e5m2(%srcA : f32, %srcB : f32) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e5m2x2.rn(float %{{.*}}, float %{{.*}})
- %res1 = nvvm.convert.f32x2.to.f8x2 <e5m2> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E5M2)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.e5m2x2.rn.relu(float %{{.*}}, float %{{.*}})
- %res2 = nvvm.convert.f32x2.to.f8x2 <e5m2> %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E5M2)
llvm.return
}
// CHECK-LABEL: @convert_f32x2_to_f8x2_ue8m0
llvm.func @convert_f32x2_to_f8x2_ue8m0(%srcA : f32, %srcB : f32) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rz(float %{{.*}}, float %{{.*}})
- %res1 = nvvm.convert.f32x2.to.f8x2 <ue8m0> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rz>} : i16
+ %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rz>} : i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rp(float %{{.*}}, float %{{.*}})
- %res2 = nvvm.convert.f32x2.to.f8x2 <ue8m0> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rp>} : i16
+ %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rp>} : i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rz.satfinite(float %{{.*}}, float %{{.*}})
- %res3 = nvvm.convert.f32x2.to.f8x2 <ue8m0> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res3 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.ff.to.ue8m0x2.rp.satfinite(float %{{.*}}, float %{{.*}})
- %res4 = nvvm.convert.f32x2.to.f8x2 <ue8m0> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : i16
+ %res4 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E8M0FNU)
llvm.return
}
@@ -37,10 +37,10 @@ llvm.func @convert_f32x2_to_f8x2_ue8m0(%srcA : f32, %srcB : f32) {
llvm.func @convert_f32x2_to_f8x2_vector_return(%srcA : f32, %srcB : f32) {
// CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e4m3x2.rn(float %{{.*}}, float %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8>
- %res1 = nvvm.convert.f32x2.to.f8x2 <e4m3> %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : vector<2xi8>
+ %res1 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : vector<2xi8> (f8E4M3FN)
// CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e4m3x2.rn.relu(float %{{.*}}, float %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res2]] to <2 x i8>
- %res2 = nvvm.convert.f32x2.to.f8x2 <e4m3> %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : vector<2xi8>
+ %res2 = nvvm.convert.f32x2.to.f8x2 %srcA, %srcB {relu = true, rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<satfinite>} : vector<2xi8> (f8E4M3FN)
llvm.return
}
@@ -49,18 +49,18 @@ llvm.func @convert_f32x2_to_f8x2_vector_return(%srcA : f32, %srcB : f32) {
// CHECK-LABEL: @convert_f16x2_to_f8x2_e4m3
llvm.func @convert_f16x2_to_f8x2_e4m3(%src : vector<2xf16>) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> %{{.*}})
- %res1 = nvvm.convert.f16x2.to.f8x2 <e4m3> %src : vector<2xf16> -> i16
+ %res1 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> i16 (f8E4M3FN)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> %{{.*}})
- %res2 = nvvm.convert.f16x2.to.f8x2 <e4m3> %src {relu = true} : vector<2xf16> -> i16
+ %res2 = nvvm.convert.f16x2.to.f8x2 %src {relu = true} : vector<2xf16> -> i16 (f8E4M3FN)
llvm.return
}
// CHECK-LABEL: @convert_f16x2_to_f8x2_e5m2
llvm.func @convert_f16x2_to_f8x2_e5m2(%src : vector<2xf16>) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> %{{.*}})
- %res1 = nvvm.convert.f16x2.to.f8x2 <e5m2> %src : vector<2xf16> -> i16
+ %res1 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> i16 (f8E5M2)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> %{{.*}})
- %res2 = nvvm.convert.f16x2.to.f8x2 <e5m2> %src {relu = true} : vector<2xf16> -> i16
+ %res2 = nvvm.convert.f16x2.to.f8x2 %src {relu = true} : vector<2xf16> -> i16 (f8E5M2)
llvm.return
}
@@ -68,10 +68,10 @@ llvm.func @convert_f16x2_to_f8x2_e5m2(%src : vector<2xf16>) {
llvm.func @convert_f16x2_to_f8x2_vector_return(%src : vector<2xf16>) {
// CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8>
- %res1 = nvvm.convert.f16x2.to.f8x2 <e4m3> %src : vector<2xf16> -> vector<2xi8>
+ %res1 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> vector<2xi8> (f8E4M3FN)
// CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res2]] to <2 x i8>
- %res2 = nvvm.convert.f16x2.to.f8x2 <e5m2> %src : vector<2xf16> -> vector<2xi8>
+ %res2 = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> vector<2xi8> (f8E5M2)
llvm.return
}
@@ -80,13 +80,13 @@ llvm.func @convert_f16x2_to_f8x2_vector_return(%src : vector<2xf16>) {
// CHECK-LABEL: @convert_bf16x2_to_f8x2_ue8m0
llvm.func @convert_bf16x2_to_f8x2_ue8m0(%src : vector<2xbf16>) {
// CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz(<2 x bfloat> %{{.*}})
- %res1 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> i16
+ %res1 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp(<2 x bfloat> %{{.*}})
- %res2 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16> -> i16
+ %res2 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rp>} : vector<2xbf16> -> i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz.satfinite(<2 x bfloat> %{{.*}})
- %res3 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> i16
+ %res3 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> i16 (f8E8M0FNU)
// CHECK: %{{.*}} = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp.satfinite(<2 x bfloat> %{{.*}})
- %res4 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> i16
+ %res4 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> i16 (f8E8M0FNU)
llvm.return
}
@@ -94,9 +94,9 @@ llvm.func @convert_bf16x2_to_f8x2_ue8m0(%src : vector<2xbf16>) {
llvm.func @convert_bf16x2_to_f8x2_vector_return(%src : vector<2xbf16>) {
// CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rz(<2 x bfloat> %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res1]] to <2 x i8>
- %res1 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> vector<2xi8>
+ %res1 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU)
// CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.bf16x2.to.ue8m0x2.rp.satfinite(<2 x bfloat> %{{.*}})
// CHECK-NEXT: %{{.*}} = bitcast i16 %[[res2]] to <2 x i8>
- %res2 = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> vector<2xi8>
+ %res2 = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> -> vector<2xi8> (f8E8M0FNU)
llvm.return
}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 383f482..0b36154 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -175,64 +175,64 @@ llvm.func @nvvm_match_sync_any(%val32: i32, %thread_mask: i32) {
// -----
llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e4m3(%a : f32, %b : f32) {
- // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to .e4m3x2 or .e5m2x2 types}}
- %res = nvvm.convert.f32x2.to.f8x2 <e4m3> %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : i16
+ // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rz>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E4M3FN)
llvm.return
}
// -----
llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_e5m2(%a : f32, %b : f32) {
- // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to .e4m3x2 or .e5m2x2 types}}
- %res = nvvm.convert.f32x2.to.f8x2 <e5m2> %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : i16
+ // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E5M2)
llvm.return
}
// -----
llvm.func @nvvm_cvt_float_to_f8x2_invalid_rounding_ue8m0(%a : f32, %b : f32) {
- // expected-error @below {{Only RZ or RP rounding modes are supported for conversions from f32x2 to .ue8m0x2 type}}
- %res = nvvm.convert.f32x2.to.f8x2 <ue8m0> %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : i16
+ // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from f32x2 to 'f8E8M0FNU' type}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rn>} : i16 (f8E8M0FNU)
llvm.return
}
// -----
llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e4m3(%a : f32, %b : f32) {
- // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to .e4m3x2 or .e5m2x2 types}}
- %res = nvvm.convert.f32x2.to.f8x2 <e4m3> %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<none>} : i16
+ // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<none>} : i16 (f8E4M3FN)
llvm.return
}
// -----
llvm.func @nvvm_cvt_float_to_f8x2_invalid_saturation_e5m2(%a : f32, %b : f32) {
- // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to .e4m3x2 or .e5m2x2 types}}
- %res = nvvm.convert.f32x2.to.f8x2 <e5m2> %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<none>} : i16
+ // expected-error @below {{Only SATFINITE saturation mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rn>, sat = #nvvm.sat_mode<none>} : i16 (f8E5M2)
llvm.return
}
// -----
llvm.func @nvvm_cvt_float_to_f8x2_relu_not_supported_ue8m0(%a : f32, %b : f32) {
- // expected-error @below {{relu not supported for conversions to .ue8m0x2 type}}
- %res = nvvm.convert.f32x2.to.f8x2 <ue8m0> %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, relu = true} : i16
+ // expected-error @below {{relu not supported for conversions to 'f8E8M0FNU' type}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rp>, relu = true} : i16 (f8E8M0FNU)
llvm.return
}
// -----
llvm.func @nvvm_cvt_f16x2_to_f8x2_invalid_type(%src : vector<2xf16>) {
- // expected-error @below {{Only .e4m3 or .e5m2 types are supported for conversions from f16x2 to f8x2.}}
- %res = nvvm.convert.f16x2.to.f8x2 <ue8m0> %src : vector<2xf16> -> i16
+ // expected-error @below {{Only 'f8E4M3FN' and 'f8E5M2' types are supported for conversions from f16x2 to f8x2.}}
+ %res = nvvm.convert.f16x2.to.f8x2 %src : vector<2xf16> -> i16 (f8E8M0FNU)
llvm.return
}
// -----
llvm.func @nvvm_cvt_bf16x2_to_f8x2_invalid_type(%src : vector<2xbf16>) {
- // expected-error @below {{Only .ue8m0 type is supported for conversions from bf16x2 to f8x2.}}
- %res = nvvm.convert.bf16x2.to.f8x2 <e4m3> %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> i16
+ // expected-error @below {{Only 'f8E8M0FNU' type is supported for conversions from bf16x2 to f8x2.}}
+ %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> -> i16 (f8E4M3FN)
llvm.return
}
@@ -240,7 +240,15 @@ llvm.func @nvvm_cvt_bf16x2_to_f8x2_invalid_type(%src : vector<2xbf16>) {
llvm.func @nvvm_cvt_bf16x2_to_f8x2_invalid_rounding(%src : vector<2xbf16>) {
// expected-error @below {{Only RZ and RP rounding modes are supported for conversions from bf16x2 to f8x2.}}
- %res = nvvm.convert.bf16x2.to.f8x2 <ue8m0> %src {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16> -> i16
+ %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xbf16> -> i16 (f8E8M0FNU)
+ llvm.return
+}
+
+// -----
+
+llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) {
+ // expected-error @below {{Only 'f6E2M3FN' and 'f6E3M2FN' types are supported for conversions from f32x2 to f6x2.}}
+ %res = nvvm.convert.f32x2.to.f6x2 %a, %b : i16 (f8E8M0FNU)
llvm.return
}
diff --git a/mlir/test/lib/Analysis/DataFlow/TestDenseBackwardDataFlowAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestDenseBackwardDataFlowAnalysis.cpp
index d57b41c..eb0d980 100644
--- a/mlir/test/lib/Analysis/DataFlow/TestDenseBackwardDataFlowAnalysis.cpp
+++ b/mlir/test/lib/Analysis/DataFlow/TestDenseBackwardDataFlowAnalysis.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "TestDenseDataFlowAnalysis.h"
-#include "TestDialect.h"
#include "TestOps.h"
#include "mlir/Analysis/DataFlow/DenseAnalysis.h"
#include "mlir/Analysis/DataFlow/Utils.h"
@@ -23,12 +22,15 @@
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/TypeID.h"
+#include "llvm/Support/DebugLog.h"
#include "llvm/Support/raw_ostream.h"
using namespace mlir;
using namespace mlir::dataflow;
using namespace mlir::dataflow::test;
+#define DEBUG_TYPE "test-next-access"
+
namespace {
class NextAccess : public AbstractDenseLattice, public AccessLatticeBase {
@@ -72,6 +74,7 @@ public:
// means "we don't know what the next access is" rather than "there is no next
// access". But it's unclear how to differentiate the two cases...
void setToExitState(NextAccess *lattice) override {
+ LDBG() << "setToExitState: setting lattice to unknown state";
propagateIfChanged(lattice, lattice->setKnownToUnknown());
}
@@ -87,16 +90,23 @@ public:
LogicalResult NextAccessAnalysis::visitOperation(Operation *op,
const NextAccess &after,
NextAccess *before) {
+ LDBG() << "visitOperation: "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
+ LDBG() << " after state: " << after;
+ LDBG() << " before state: " << *before;
+
auto memory = dyn_cast<MemoryEffectOpInterface>(op);
// If we can't reason about the memory effects, conservatively assume we can't
// say anything about the next access.
if (!memory) {
+ LDBG() << " No memory effect interface, setting to exit state";
setToExitState(before);
return success();
}
SmallVector<MemoryEffects::EffectInstance> effects;
memory.getEffects(effects);
+ LDBG() << " Found " << effects.size() << " memory effects";
// First, check if all underlying values are already known. Otherwise, avoid
// propagating and stay in the "undefined" state to avoid incorrectly
@@ -110,6 +120,7 @@ LogicalResult NextAccessAnalysis::visitOperation(Operation *op,
// Effects with unspecified value are treated conservatively and we cannot
// assume anything about the next access.
if (!value) {
+ LDBG() << " Effect has unspecified value, setting to exit state";
setToExitState(before);
return success();
}
@@ -124,38 +135,63 @@ LogicalResult NextAccessAnalysis::visitOperation(Operation *op,
});
// If the underlying value is not known yet, don't propagate.
- if (!underlyingValue)
+ if (!underlyingValue) {
+ LDBG() << " Underlying value not known for " << value
+ << ", skipping propagation";
return success();
+ }
+ LDBG() << " Found underlying value " << *underlyingValue << " for "
+ << value;
underlyingValues.push_back(*underlyingValue);
}
// Update the state if all underlying values are known.
+ LDBG() << " All underlying values known, updating state";
ChangeResult result = before->meet(after);
for (const auto &[effect, value] : llvm::zip(effects, underlyingValues)) {
// If the underlying value is known to be unknown, set to fixpoint.
if (!value) {
+ LDBG() << " Underlying value is unknown, setting to exit state";
setToExitState(before);
return success();
}
+ LDBG() << " Setting next access for value " << value << " to operation "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
result |= before->set(value, op);
}
+ LDBG() << " Final result: "
+ << (result == ChangeResult::Change ? "changed" : "no change");
propagateIfChanged(before, result);
return success();
}
void NextAccessAnalysis::buildOperationEquivalentLatticeAnchor(Operation *op) {
+ LDBG() << "buildOperationEquivalentLatticeAnchor: "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
if (isMemoryEffectFree(op)) {
+ LDBG() << " Operation is memory effect free, unioning lattice anchors";
unionLatticeAnchors<NextAccess>(getProgramPointBefore(op),
getProgramPointAfter(op));
+ } else {
+ LDBG() << " Operation has memory effects, not unioning lattice anchors";
}
}
void NextAccessAnalysis::visitCallControlFlowTransfer(
CallOpInterface call, CallControlFlowAction action, const NextAccess &after,
NextAccess *before) {
+ LDBG() << "visitCallControlFlowTransfer: "
+ << OpWithFlags(call.getOperation(), OpPrintingFlags().skipRegions());
+ LDBG() << " action: "
+ << (action == CallControlFlowAction::ExternalCallee ? "ExternalCallee"
+ : action == CallControlFlowAction::EnterCallee ? "EnterCallee"
+ : "ExitCallee");
+ LDBG() << " assumeFuncReads: " << assumeFuncReads;
+
if (action == CallControlFlowAction::ExternalCallee && assumeFuncReads) {
+ LDBG() << " Handling external callee with assumed function reads";
SmallVector<Value> underlyingValues;
underlyingValues.reserve(call->getNumOperands());
for (Value operand : call.getArgOperands()) {
@@ -165,15 +201,26 @@ void NextAccessAnalysis::visitCallControlFlowTransfer(
return getOrCreateFor<UnderlyingValueLattice>(
getProgramPointBefore(call.getOperation()), value);
});
- if (!underlyingValue)
+ if (!underlyingValue) {
+ LDBG() << " Underlying value not known for operand " << operand
+ << ", returning";
return;
+ }
+ LDBG() << " Found underlying value " << *underlyingValue
+ << " for operand " << operand;
underlyingValues.push_back(*underlyingValue);
}
+ LDBG() << " Setting next access for " << underlyingValues.size()
+ << " operands";
ChangeResult result = before->meet(after);
for (Value operand : underlyingValues) {
+ LDBG() << " Setting next access for operand " << operand << " to call "
+ << call;
result |= before->set(operand, call);
}
+ LDBG() << " Call control flow result: "
+ << (result == ChangeResult::Change ? "changed" : "no change");
return propagateIfChanged(before, result);
}
auto testCallAndStore =
@@ -182,8 +229,10 @@ void NextAccessAnalysis::visitCallControlFlowTransfer(
testCallAndStore.getStoreBeforeCall()) ||
(action == CallControlFlowAction::ExitCallee &&
!testCallAndStore.getStoreBeforeCall()))) {
+ LDBG() << " Handling TestCallAndStoreOp with special logic";
(void)visitOperation(call, after, before);
} else {
+ LDBG() << " Using default call control flow transfer logic";
AbstractDenseBackwardDataFlowAnalysis::visitCallControlFlowTransfer(
call, action, after, before);
}
@@ -192,6 +241,11 @@ void NextAccessAnalysis::visitCallControlFlowTransfer(
void NextAccessAnalysis::visitRegionBranchControlFlowTransfer(
RegionBranchOpInterface branch, RegionBranchPoint regionFrom,
RegionBranchPoint regionTo, const NextAccess &after, NextAccess *before) {
+ LDBG() << "visitRegionBranchControlFlowTransfer: "
+ << OpWithFlags(branch.getOperation(), OpPrintingFlags().skipRegions());
+ LDBG() << " regionFrom: " << (regionFrom.isParent() ? "parent" : "region");
+ LDBG() << " regionTo: " << (regionTo.isParent() ? "parent" : "region");
+
auto testStoreWithARegion =
dyn_cast<::test::TestStoreWithARegion>(branch.getOperation());
@@ -199,9 +253,11 @@ void NextAccessAnalysis::visitRegionBranchControlFlowTransfer(
((regionTo.isParent() && !testStoreWithARegion.getStoreBeforeRegion()) ||
(regionFrom.isParent() &&
testStoreWithARegion.getStoreBeforeRegion()))) {
+ LDBG() << " Handling TestStoreWithARegion with special logic";
(void)visitOperation(branch, static_cast<const NextAccess &>(after),
static_cast<NextAccess *>(before));
} else {
+ LDBG() << " Using default region branch control flow transfer logic";
propagateIfChanged(before, before->meet(after));
}
}
@@ -278,6 +334,11 @@ struct TestNextAccessPass
void runOnOperation() override {
Operation *op = getOperation();
+ LDBG() << "runOnOperation: Starting test-next-access pass on "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
+ LDBG() << " interprocedural: " << interprocedural;
+ LDBG() << " assumeFuncReads: " << assumeFuncReads;
+
SymbolTableCollection symbolTable;
auto config = DataFlowConfig().setInterprocedural(interprocedural);
@@ -285,15 +346,20 @@ struct TestNextAccessPass
loadBaselineAnalyses(solver);
solver.load<NextAccessAnalysis>(symbolTable, assumeFuncReads);
solver.load<UnderlyingValueAnalysis>();
+ LDBG() << " Initializing and running dataflow solver";
if (failed(solver.initializeAndRun(op))) {
emitError(op->getLoc(), "dataflow solver failed");
return signalPassFailure();
}
+ LDBG() << " Dataflow solver completed successfully";
+ LDBG() << " Walking operations to set next access attributes";
op->walk([&](Operation *op) {
auto tag = op->getAttrOfType<StringAttr>(kTagAttrName);
if (!tag)
return;
+ LDBG() << " Processing tagged operation: "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
const NextAccess *nextAccess =
solver.lookupState<NextAccess>(solver.getProgramPointAfter(op));
op->setAttr(kNextAccessAttrName,
diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt
index 3b7bd9b..e31140a 100644
--- a/mlir/test/lib/Dialect/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/CMakeLists.txt
@@ -12,6 +12,7 @@ add_subdirectory(Math)
add_subdirectory(MemRef)
add_subdirectory(Shard)
add_subdirectory(NVGPU)
+add_subdirectory(OpenACC)
add_subdirectory(SCF)
add_subdirectory(Shape)
add_subdirectory(SPIRV)
diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000..f84055d
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_mlir_library(MLIROpenACCTestPasses
+ TestOpenACC.cpp
+ TestPointerLikeTypeInterface.cpp
+
+ EXCLUDE_FROM_LIBMLIR
+)
+mlir_target_link_libraries(MLIROpenACCTestPasses PUBLIC
+ MLIRIR
+ MLIRArithDialect
+ MLIRFuncDialect
+ MLIRMemRefDialect
+ MLIROpenACCDialect
+ MLIRPass
+ MLIRSupport
+)
+
diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
new file mode 100644
index 0000000..9886240
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
@@ -0,0 +1,23 @@
+//===- TestOpenACC.cpp - OpenACC Test Registration ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains unified registration for all OpenACC test passes.
+//
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+namespace test {
+
+// Forward declarations of individual test pass registration functions
+void registerTestPointerLikeTypeInterfacePass();
+
+// Unified registration function for all OpenACC tests
+void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); }
+
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
new file mode 100644
index 0000000..85f9283
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -0,0 +1,305 @@
+//===- TestPointerLikeTypeInterface.cpp - Test PointerLikeType interface -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains test passes for testing the OpenACC PointerLikeType
+// interface methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+namespace {
+
+struct OperationTracker : public OpBuilder::Listener {
+ SmallVector<Operation *> insertedOps;
+
+ void notifyOperationInserted(Operation *op,
+ OpBuilder::InsertPoint previous) override {
+ insertedOps.push_back(op);
+ }
+};
+
+struct TestPointerLikeTypeInterfacePass
+ : public PassWrapper<TestPointerLikeTypeInterfacePass,
+ OperationPass<func::FuncOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestPointerLikeTypeInterfacePass)
+
+ TestPointerLikeTypeInterfacePass() = default;
+ TestPointerLikeTypeInterfacePass(const TestPointerLikeTypeInterfacePass &pass)
+ : PassWrapper(pass) {
+ testMode = pass.testMode;
+ }
+
+ Pass::Option<std::string> testMode{
+ *this, "test-mode",
+ llvm::cl::desc("Test mode: walk, alloc, copy, or free"),
+ llvm::cl::init("walk")};
+
+ StringRef getArgument() const override {
+ return "test-acc-pointer-like-interface";
+ }
+
+ StringRef getDescription() const override {
+ return "Test OpenACC PointerLikeType interface methods on any implementing "
+ "type";
+ }
+
+ void runOnOperation() override;
+
+ void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<acc::OpenACCDialect>();
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ }
+
+private:
+ void walkAndPrint();
+ void testGenAllocate(Operation *op, Value result, PointerLikeType pointerType,
+ OpBuilder &builder);
+ void testGenFree(Operation *op, Value result, PointerLikeType pointerType,
+ OpBuilder &builder);
+ void testGenCopy(Operation *srcOp, Operation *destOp, Value srcResult,
+ Value destResult, PointerLikeType pointerType,
+ OpBuilder &builder);
+
+ struct PointerCandidate {
+ Operation *op;
+ Value result;
+ PointerLikeType pointerType;
+ };
+};
+
+void TestPointerLikeTypeInterfacePass::runOnOperation() {
+ if (testMode == "walk") {
+ walkAndPrint();
+ return;
+ }
+
+ auto func = getOperation();
+ OpBuilder builder(&getContext());
+
+ if (testMode == "alloc" || testMode == "free") {
+ // Collect all candidates first
+ SmallVector<PointerCandidate> candidates;
+ func.walk([&](Operation *op) {
+ if (op->hasAttr("test.ptr")) {
+ for (auto result : op->getResults()) {
+ if (isa<PointerLikeType>(result.getType())) {
+ candidates.push_back(
+ {op, result, cast<PointerLikeType>(result.getType())});
+ break; // Only take the first PointerLikeType result
+ }
+ }
+ }
+ });
+
+ // Now test all candidates
+ for (const auto &candidate : candidates) {
+ if (testMode == "alloc")
+ testGenAllocate(candidate.op, candidate.result, candidate.pointerType,
+ builder);
+ else if (testMode == "free")
+ testGenFree(candidate.op, candidate.result, candidate.pointerType,
+ builder);
+ }
+ } else if (testMode == "copy") {
+ // Collect all source and destination candidates
+ SmallVector<PointerCandidate> sources, destinations;
+
+ func.walk([&](Operation *op) {
+ if (op->hasAttr("test.src_ptr")) {
+ for (auto result : op->getResults()) {
+ if (isa<PointerLikeType>(result.getType())) {
+ sources.push_back(
+ {op, result, cast<PointerLikeType>(result.getType())});
+ break;
+ }
+ }
+ }
+ if (op->hasAttr("test.dest_ptr")) {
+ for (auto result : op->getResults()) {
+ if (isa<PointerLikeType>(result.getType())) {
+ destinations.push_back(
+ {op, result, cast<PointerLikeType>(result.getType())});
+ break;
+ }
+ }
+ }
+ });
+
+ // Try copying from each source to each destination
+ for (const auto &src : sources)
+ for (const auto &dest : destinations)
+ testGenCopy(src.op, dest.op, src.result, dest.result, src.pointerType,
+ builder);
+ }
+}
+
+void TestPointerLikeTypeInterfacePass::walkAndPrint() {
+ auto func = getOperation();
+
+ func.walk([&](Operation *op) {
+ // Look for operations marked with "test.ptr", "test.src_ptr", or
+ // "test.dest_ptr"
+ if (op->hasAttr("test.ptr") || op->hasAttr("test.src_ptr") ||
+ op->hasAttr("test.dest_ptr")) {
+ llvm::errs() << "Operation: ";
+ op->print(llvm::errs());
+ llvm::errs() << "\n";
+
+ // Check each result to see if it's a PointerLikeType
+ for (auto result : op->getResults()) {
+ if (isa<PointerLikeType>(result.getType())) {
+ llvm::errs() << " Result " << result.getResultNumber()
+ << " is PointerLikeType: ";
+ result.getType().print(llvm::errs());
+ llvm::errs() << "\n";
+ } else {
+ llvm::errs() << " Result " << result.getResultNumber()
+ << " is NOT PointerLikeType: ";
+ result.getType().print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+ }
+
+ if (op->getNumResults() == 0)
+ llvm::errs() << " Operation has no results\n";
+
+ llvm::errs() << "\n";
+ }
+ });
+}
+
+void TestPointerLikeTypeInterfacePass::testGenAllocate(
+ Operation *op, Value result, PointerLikeType pointerType,
+ OpBuilder &builder) {
+ Location loc = op->getLoc();
+
+ // Create a new builder with the listener and set insertion point
+ OperationTracker tracker;
+ OpBuilder newBuilder(op->getContext());
+ newBuilder.setListener(&tracker);
+ newBuilder.setInsertionPointAfter(op);
+
+ // Call the genAllocate API
+ Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc",
+ result.getType(), result);
+
+ if (allocRes) {
+ llvm::errs() << "Successfully generated alloc for operation: ";
+ op->print(llvm::errs());
+ llvm::errs() << "\n";
+
+ // Print all operations that were inserted
+ for (Operation *insertedOp : tracker.insertedOps) {
+ llvm::errs() << "\tGenerated: ";
+ insertedOp->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+ } else {
+ llvm::errs() << "Failed to generate alloc for operation: ";
+ op->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+}
+
+void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result,
+ PointerLikeType pointerType,
+ OpBuilder &builder) {
+ Location loc = op->getLoc();
+
+ // Create a new builder with the listener and set insertion point
+ OperationTracker tracker;
+ OpBuilder newBuilder(op->getContext());
+ newBuilder.setListener(&tracker);
+ newBuilder.setInsertionPointAfter(op);
+
+ // Call the genFree API
+ auto typedResult = cast<TypedValue<PointerLikeType>>(result);
+ bool success =
+ pointerType.genFree(newBuilder, loc, typedResult, result.getType());
+
+ if (success) {
+ llvm::errs() << "Successfully generated free for operation: ";
+ op->print(llvm::errs());
+ llvm::errs() << "\n";
+
+ // Print all operations that were inserted
+ for (Operation *insertedOp : tracker.insertedOps) {
+ llvm::errs() << "\tGenerated: ";
+ insertedOp->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+ } else {
+ llvm::errs() << "Failed to generate free for operation: ";
+ op->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+}
+
+void TestPointerLikeTypeInterfacePass::testGenCopy(
+ Operation *srcOp, Operation *destOp, Value srcResult, Value destResult,
+ PointerLikeType pointerType, OpBuilder &builder) {
+ Location loc = destOp->getLoc();
+
+ // Create a new builder with the listener and set insertion point
+ OperationTracker tracker;
+ OpBuilder newBuilder(destOp->getContext());
+ newBuilder.setListener(&tracker);
+ newBuilder.setInsertionPointAfter(destOp);
+
+ // Call the genCopy API with the provided source and destination
+ auto typedSrc = cast<TypedValue<PointerLikeType>>(srcResult);
+ auto typedDest = cast<TypedValue<PointerLikeType>>(destResult);
+ bool success = pointerType.genCopy(newBuilder, loc, typedDest, typedSrc,
+ srcResult.getType());
+
+ if (success) {
+ llvm::errs() << "Successfully generated copy from source: ";
+ srcOp->print(llvm::errs());
+ llvm::errs() << " to destination: ";
+ destOp->print(llvm::errs());
+ llvm::errs() << "\n";
+
+ // Print all operations that were inserted
+ for (Operation *insertedOp : tracker.insertedOps) {
+ llvm::errs() << "\tGenerated: ";
+ insertedOp->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+ } else {
+ llvm::errs() << "Failed to generate copy from source: ";
+ srcOp->print(llvm::errs());
+ llvm::errs() << " to destination: ";
+ destOp->print(llvm::errs());
+ llvm::errs() << "\n";
+ }
+}
+
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// Pass Registration
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+namespace test {
+void registerTestPointerLikeTypeInterfacePass() {
+ PassRegistration<TestPointerLikeTypeInterfacePass>();
+}
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 5f92f5b..8e6208e 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -435,3 +435,23 @@ def testPrintIrTree():
print_file_tree(temp_dir)
log("// Tree printing end")
+
+
+# CHECK-LABEL: TEST: testEnableStatistics
+@run
+def testEnableStatistics():
+ with Context() as ctx:
+ module = ModuleOp.parse(
+ """
+ module {
+ func.func @main() {
+ %0 = arith.constant 10
+ return
+ }
+ }
+ """
+ )
+ pm = PassManager.parse("builtin.module(canonicalize)")
+ pm.enable_statistics()
+ # CHECK: Pass statistics report
+ pm.run(module)