diff options
Diffstat (limited to 'mlir/test')
-rw-r--r-- | mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir | 54 | ||||
-rw-r--r-- | mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir | 12 | ||||
-rw-r--r-- | mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir | 24 | ||||
-rw-r--r-- | mlir/test/Dialect/ArmSME/vector-legalization.mlir | 11 | ||||
-rw-r--r-- | mlir/test/Dialect/OpenMP/invalid.mlir | 126 | ||||
-rw-r--r-- | mlir/test/Dialect/OpenMP/ops.mlir | 260 | ||||
-rw-r--r-- | mlir/test/Dialect/Vector/canonicalize.mlir | 8 | ||||
-rw-r--r-- | mlir/test/Dialect/Vector/invalid.mlir | 7 | ||||
-rw-r--r-- | mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir | 112 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/Import/intrinsic.ll | 2 | ||||
-rw-r--r-- | mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp | 26 | ||||
-rw-r--r-- | mlir/test/lib/Dialect/Test/TestDialect.cpp | 37 | ||||
-rw-r--r-- | mlir/test/lib/Dialect/Test/TestOps.td | 16 |
13 files changed, 525 insertions, 170 deletions
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index e649036..b404900 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" %s -verify-diagnostics -o -| FileCheck %s // RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named{prefer-conv2d-kernel-layout-hwcf=true}))" %s -verify-diagnostics -o -| FileCheck --check-prefix="HWCF" %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named,cse))" %s -verify-diagnostics -o -| FileCheck --check-prefix="CHECK-CSE" %s // CHECK-LABEL: @matmul func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { @@ -215,6 +216,59 @@ func.func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { return } +// CHECK-CSE-LABEL: @max_pool_all_dynamic +func.func @max_pool_all_dynamic(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { + // Batch size + // CHECK-CSE: %[[C0:.+]] = arith.constant 0 : index + // CHECK-CSE: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] : tensor<?x?x?x?xf32> + + // Compute output height + // CHECK-CSE: %[[C1:.+]] = arith.constant 1 : index + // CHECK-CSE: %[[IH:.+]] = tensor.dim %arg0, %[[C1]] : tensor<?x?x?x?xf32> + // CHECK-CSE: %[[C2:.+]] = arith.constant 2 : index + // CHECK-CSE: %[[PADDED_BEFORE:.+]] = arith.addi %[[IH]], %[[C0]] : index + // CHECK-CSE: %[[PADDED_AFTER:.+]] = arith.addi %[[PADDED_BEFORE]], %[[C0]] : index + // CHECK-CSE: %[[SUB_ONE:.+]] = arith.subi %[[C2]], %[[C1]] : index + // CHECK-CSE: %[[DILATED:.+]] = arith.muli %[[C1]], %[[SUB_ONE]] : index + // CHECK-CSE: %[[ADD_ONE:.+]] = arith.addi %[[DILATED]], %[[C1]] : index + // CHECK-CSE: %[[SUBTRACT:.+]] = arith.subi %[[PADDED_AFTER]], %[[ADD_ONE]] : index + // CHECK-CSE: %[[DIVIDE:.+]] = arith.divui %[[SUBTRACT]], %[[C1]] : index + // CHECK-CSE: %[[HEIGHT:.+]] = arith.addi %[[DIVIDE]], %[[C1]] : index + + // Compute output width + // CHECK-CSE: %[[IW:.+]] = tensor.dim %arg0, %[[C2]] : tensor<?x?x?x?xf32> + // CHECK-CSE: %[[C5:.+]] = arith.constant 5 : index + // CHECK-CSE: %[[PADDED_BEFORE:.+]] = arith.addi %[[IW]], %[[C2]] : index + // CHECK-CSE: %[[PADDED_AFTER:.+]] = arith.addi %[[PADDED_BEFORE]], %[[C2]] : index + // CHECK-CSE: %[[SUB_ONE:.+]] = arith.subi %[[C5]], %[[C1]] : index + // CHECK-CSE: %[[DILATED:.+]] = arith.muli %[[C1]], %[[SUB_ONE]] : index + // CHECK-CSE: %[[ADD_ONE:.+]] = arith.addi %[[DILATED]], %[[C1]] : index + // CHECK-CSE: %[[SUBTRACT:.+]] = arith.subi %[[PADDED_AFTER]], %[[ADD_ONE]] : index + // CHECK-CSE: %[[DIVIDE:.+]] = arith.divui %[[SUBTRACT]], %[[C1]] : index + // CHECK-CSE: %[[WIDTH:.+]] = arith.addi %14, %[[C1]] : index + + // Channel size + // CHECK-CSE: %[[C3:.+]] = arith.constant 3 : index + // CHECK-CSE: %[[CHANNEL:.+]] = tensor.dim %arg0, %[[C3]] : tensor<?x?x?x?xf32> + + // Pad the input + // CHECK-CSE: %[[FLOAT_MIN:.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-CSE: %[[PADDED:.+]] = tensor.pad %arg0 low[0, 0, 2, 0] high[0, 0, 2, 0] { + // CHECK-CSE: tensor.yield %[[FLOAT_MIN]] : f32 + + // Allocate the output and fill with minimum value + // CHECK-CSE: %[[INIT:.+]] = tensor.empty(%[[BATCH]], %[[HEIGHT]], %[[WIDTH]], %[[CHANNEL]]) : tensor<?x?x?x?xf32> + // CHECK-CSE: %[[FILL:.+]] = linalg.fill ins(%[[FLOAT_MIN]] : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> + // CHECK-CSE: %[[FAKE_WINDOW:.+]] = tensor.empty() : tensor<2x5xf32> + + // Compute max pool + // CHECK-CSE: %[[OUT:.+]] = linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PADDED]], %[[FAKE_WINDOW]] : tensor<?x?x?x?xf32>, tensor<2x5xf32>) outs(%[[FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> + // CHECK-CSE: return %[[OUT]] + + %0 = tosa.max_pool2d %arg0 {kernel = array<i64: 2, 5>, pad = array<i64: 0, 0, 2, 2>, stride = array<i64: 1, 1>} : (tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> + return %0 : tensor<?x?x?x?xf32> +} + // ----- // CHECK-LABEL: @avg_pool_f32 diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 1fa783f..445e8be 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -270,7 +270,8 @@ func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32 // CHECK: %[[VAL_0:.*]] = tensor.dim %[[ARG0]], %[[CONST0]] : tensor<?x?xf32> // CHECK: %[[VAL_1:.*]] = arith.cmpi eq, %[[VAL_0]], %[[CONST1]] : index // CHECK: %[[ARG0_DIM0_BROADCAST:.*]] = scf.if %[[VAL_1]] -> (tensor<?x?xf32>) { - // CHECK: %[[VAL_2:.*]] = tensor.dim %[[ARG0]], %[[CONST1]] : tensor<?x?xf32> + // CHECK: %[[LOCAL_CONST1:.*]] = arith.constant 1 : index + // CHECK: %[[VAL_2:.*]] = tensor.dim %[[ARG0]], %[[LOCAL_CONST1]] : tensor<?x?xf32> // CHECK: %[[VAL_3:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_2]]) : tensor<?x?xf32> // CHECK: %[[VAL_4:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[VAL_3]] : tensor<?x?xf32>) { // CHECK: ^bb0(%[[VAL_5:.*]]: f32, %[[VAL_6:.*]]: f32): @@ -284,7 +285,8 @@ func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32 // CHECK: %[[VAL_7:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32> // CHECK: %[[VAL_8:.*]] = arith.cmpi eq, %[[VAL_7]], %[[CONST1]] : index // CHECK: %[[ARG0_DIM1_BROADCAST:.*]] = scf.if %[[VAL_8]] -> (tensor<?x?xf32>) { - // CHECK: %[[VAL_9:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[CONST0]] : tensor<?x?xf32> + // CHECK: %[[LOCAL_CONST0:.*]] = arith.constant 0 : index + // CHECK: %[[VAL_9:.*]] = tensor.dim %[[ARG0_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32> // CHECK: %[[VAL_10:.*]] = tensor.empty(%[[VAL_9]], %[[MAX_DIM1]]) : tensor<?x?xf32> // CHECK: %[[VAL_11:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_10]] : tensor<?x?xf32>) { // CHECK: ^bb0(%[[VAL_12:.*]]: f32, %[[VAL_13:.*]]: f32): @@ -298,7 +300,8 @@ func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32 // CHECK: %[[VAL_14:.*]] = tensor.dim %[[ARG1]], %[[CONST0]] : tensor<?x?xf32> // CHECK: %[[VAL_15:.*]] = arith.cmpi eq, %[[VAL_14]], %[[CONST1]] : index // CHECK: %[[ARG1_DIM0_BROADCAST:.*]] = scf.if %[[VAL_15]] -> (tensor<?x?xf32>) { - // CHECK: %[[VAL_16:.*]] = tensor.dim %[[ARG1]], %[[CONST1]] : tensor<?x?xf32> + // CHECK: %[[LOCAL_CONST1:.*]] = arith.constant 1 : index + // CHECK: %[[VAL_16:.*]] = tensor.dim %[[ARG1]], %[[LOCAL_CONST1]] : tensor<?x?xf32> // CHECK: %[[VAL_17:.*]] = tensor.empty(%[[MAX_DIM0]], %[[VAL_16]]) : tensor<?x?xf32> // CHECK: %[[VAL_18:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x?xf32>) outs(%[[VAL_17]] : tensor<?x?xf32>) { // CHECK: ^bb0(%[[VAL_19:.*]]: f32, %[[VAL_20:.*]]: f32): @@ -312,7 +315,8 @@ func.func @test_add_2d_all_dynamic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32 // CHECK: %[[VAL_21:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[CONST1]] : tensor<?x?xf32> // CHECK: %[[VAL_22:.*]] = arith.cmpi eq, %[[VAL_21]], %[[CONST1]] : index // CHECK: %[[ARG1_DIM1_BROADCAST:.*]] = scf.if %[[VAL_22]] -> (tensor<?x?xf32>) { - // CHECK: %[[VAL_23:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[CONST0]] : tensor<?x?xf32> + // CHECK: %[[LOCAL_CONST0:.*]] = arith.constant 0 : index + // CHECK: %[[VAL_23:.*]] = tensor.dim %[[ARG1_DIM0_BROADCAST]], %[[LOCAL_CONST0]] : tensor<?x?xf32> // CHECK: %[[VAL_24:.*]] = tensor.empty(%[[VAL_23]], %[[MAX_DIM1]]) : tensor<?x?xf32> // CHECK: %[[VAL_25:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG1_DIM0_BROADCAST]] : tensor<?x?xf32>) outs(%[[VAL_24]] : tensor<?x?xf32>) { // CHECK: ^bb0(%[[VAL_26:.*]]: f32, %[[VAL_27:.*]]: f32): diff --git a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir index 23c6872..935c08a 100644 --- a/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir @@ -131,3 +131,27 @@ func.func @compare_affine_min(%a: index, %b: index) { "test.compare"(%0, %a) {cmp = "LE"} : (index, index) -> () return } + +// ----- + +func.func @compare_const_map() { + %c5 = arith.constant 5 : index + // expected-remark @below{{true}} + "test.compare"(%c5) {cmp = "GT", rhs_map = affine_map<() -> (4)>} + : (index) -> () + // expected-remark @below{{true}} + "test.compare"(%c5) {cmp = "LT", lhs_map = affine_map<() -> (4)>} + : (index) -> () + return +} + +// ----- + +func.func @compare_maps(%a: index, %b: index) { + // expected-remark @below{{true}} + "test.compare"(%a, %b, %b, %a) + {cmp = "GT", lhs_map = affine_map<(d0, d1) -> (1 + d0 + d1)>, + rhs_map = affine_map<(d0, d1) -> (d0 + d1)>} + : (index, index, index, index) -> () + return +} diff --git a/mlir/test/Dialect/ArmSME/vector-legalization.mlir b/mlir/test/Dialect/ArmSME/vector-legalization.mlir index f8be697..f43ef1c 100644 --- a/mlir/test/Dialect/ArmSME/vector-legalization.mlir +++ b/mlir/test/Dialect/ArmSME/vector-legalization.mlir @@ -433,3 +433,14 @@ func.func @lift_illegal_1d_shape_cast_to_memory(%a: index, %b: index, %memref: m %cast = vector.shape_cast %illegalRead : vector<[4]x1xf32> to vector<[4]xf32> return %cast : vector<[4]xf32> } + +// ----- + +// CHECK-LABEL: @multi_tile_splat +func.func @multi_tile_splat() -> vector<[8]x[8]xi32> +{ + // CHECK: %[[SPLAT:.*]] = arith.constant dense<42> : vector<[4]x[4]xi32> + // CHECK-NEXT: return %[[SPLAT]], %[[SPLAT]], %[[SPLAT]], %[[SPLAT]] : vector<[4]x[4]xi32>, vector<[4]x[4]xi32>, vector<[4]x[4]xi32>, vector<[4]x[4]xi32> + %0 = arith.constant dense<42> : vector<[8]x[8]xi32> + return %0 : vector<[8]x[8]xi32> +} diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 88dca1b..7f86a7f 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1580,10 +1580,11 @@ func.func @omp_cancellationpoint2() { func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testmemref = "test.memref"() : () -> (memref<i32>) // expected-error @below {{expected equal sizes for allocate and allocator variables}} - "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({ - ^bb0(%arg3: i32, %arg4: i32): - "omp.terminator"() : () -> () - }) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, memref<i32>) -> () + "omp.taskloop"(%testmemref) ({ + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + }) {operandSegmentSizes = array<i32: 0, 0, 0, 0, 0, 1, 0, 0, 0>} : (memref<i32>) -> () return } @@ -1593,10 +1594,11 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) %testf32_2 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{expected as many reduction symbol references as reduction variables}} - "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ - ^bb0(%arg3: i32, %arg4: i32): - "omp.terminator"() : () -> () - }) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + "omp.taskloop"(%testf32, %testf32_2) ({ + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + }) {operandSegmentSizes = array<i32: 0, 0, 0, 2, 0, 0, 0, 0, 0>, reductions = [@add_f32]} : (!llvm.ptr, !llvm.ptr) -> () return } @@ -1604,12 +1606,12 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) - %testf32_2 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{expected as many reduction symbol references as reduction variables}} - "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({ - ^bb0(%arg3: i32, %arg4: i32): - "omp.terminator"() : () -> () - }) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + "omp.taskloop"(%testf32) ({ + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + }) {operandSegmentSizes = array<i32: 0, 0, 0, 1, 0, 0, 0, 0, 0>, reductions = [@add_f32, @add_f32]} : (!llvm.ptr) -> () return } @@ -1619,10 +1621,11 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) %testf32_2 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{expected as many reduction symbol references as reduction variables}} - "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ - ^bb0(%arg3: i32, %arg4: i32): - "omp.terminator"() : () -> () - }) {in_reductions = [@add_f32], operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + "omp.taskloop"(%testf32, %testf32_2) ({ + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + }) {in_reductions = [@add_f32], operandSegmentSizes = array<i32: 0, 0, 2, 0, 0, 0, 0, 0, 0>} : (!llvm.ptr, !llvm.ptr) -> () return } @@ -1630,12 +1633,12 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) - %testf32_2 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{expected as many reduction symbol references as reduction variables}} - "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({ - ^bb0(%arg3: i32, %arg4: i32): - "omp.terminator"() : () -> () - }) {in_reductions = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + "omp.taskloop"(%testf32) ({ + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + }) {in_reductions = [@add_f32, @add_f32], operandSegmentSizes = array<i32: 0, 0, 1, 0, 0, 0, 0, 0, 0>} : (!llvm.ptr) -> () return } @@ -1657,9 +1660,10 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) %testf32_2 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}} - omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) nogroup - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - omp.terminator + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) nogroup { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } } return } @@ -1681,9 +1685,10 @@ combiner { func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testf32 = "test.f32"() : () -> (!llvm.ptr) // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}} - omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr) in_reduction(@add_f32 -> %testf32 : !llvm.ptr) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - omp.terminator + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr) in_reduction(@add_f32 -> %testf32 : !llvm.ptr) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } } return } @@ -1693,8 +1698,20 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { %testi64 = "test.i64"() : () -> (i64) // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}} - omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.yield + } + } + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + // expected-error @below {{op must be a loop wrapper}} + omp.taskloop { + %0 = arith.constant 0 : i32 omp.terminator } return @@ -1702,6 +1719,21 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { // ----- +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + // expected-error @below {{only supported nested wrapper is 'omp.simdloop'}} + omp.taskloop { + omp.distribute { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.yield + } + omp.terminator + } + } + return +} + +// ----- + func.func @omp_threadprivate() { %1 = llvm.mlir.addressof @_QFsubEx : !llvm.ptr // expected-error @below {{op failed to verify that all of {sym_addr, tls_addr} have same type}} @@ -1866,7 +1898,16 @@ func.func @omp_target_depend(%data_var: memref<i32>) { // ----- -func.func @omp_distribute(%data_var : memref<i32>) -> () { +func.func @omp_distribute_schedule(%chunk_size : i32) -> () { + // expected-error @below {{op chunk size set without dist_schedule_static being present}} + "omp.distribute"(%chunk_size) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({ + "omp.terminator"() : () -> () + }) : (i32) -> () +} + +// ----- + +func.func @omp_distribute_allocate(%data_var : memref<i32>) -> () { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.distribute"(%data_var) <{operandSegmentSizes = array<i32: 0, 1, 0>}> ({ "omp.terminator"() : () -> () @@ -1875,6 +1916,29 @@ func.func @omp_distribute(%data_var : memref<i32>) -> () { // ----- +func.func @omp_distribute_wrapper() -> () { + // expected-error @below {{op must be a loop wrapper}} + "omp.distribute"() ({ + %0 = arith.constant 0 : i32 + "omp.terminator"() : () -> () + }) : () -> () +} + +// ----- + +func.func @omp_distribute_nested_wrapper(%data_var : memref<i32>) -> () { + // expected-error @below {{only supported nested wrappers are 'omp.parallel' and 'omp.simdloop'}} + "omp.distribute"() ({ + "omp.wsloop"() ({ + %0 = arith.constant 0 : i32 + "omp.terminator"() : () -> () + }) : () -> () + "omp.terminator"() : () -> () + }) : () -> () +} + +// ----- + omp.private {type = private} @x.privatizer : i32 alloc { ^bb0(%arg0: i32): %0 = arith.constant 0.0 : f32 diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 851d44a..802e179 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -171,6 +171,23 @@ func.func @omp_loop_nest(%lb : index, %ub : index, %step : index) -> () { omp.yield } + // TODO Remove induction variables from omp.wsloop. + omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + // CHECK: omp.loop_nest + // CHECK-SAME: (%{{.*}}) : index = + // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + "omp.loop_nest" (%lb, %ub, %step) ({ + ^bb0(%iv2: index): + // CHECK: test.op1 + "test.op1"(%lb) : (index) -> () + // CHECK: test.op2 + "test.op2"() : () -> () + // CHECK: omp.yield + omp.yield + }) : (index, index, index) -> () + omp.yield + } + return } @@ -209,6 +226,22 @@ func.func @omp_loop_nest_pretty(%lb : index, %ub : index, %step : index) -> () { omp.yield } + // TODO Remove induction variables from omp.wsloop. + omp.wsloop for (%iv) : index = (%lb) to (%ub) step (%step) { + // CHECK: omp.loop_nest + // CHECK-SAME: (%{{.*}}) : index = + // CHECK-SAME: (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + omp.loop_nest (%iv2) : index = (%lb) to (%ub) step (%step) { + // CHECK: test.op1 + "test.op1"(%lb) : (index) -> () + // CHECK: test.op2 + "test.op2"() : () -> () + // CHECK: omp.yield + omp.yield + } + omp.yield + } + return } @@ -559,30 +592,54 @@ func.func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : ind } // CHECK-LABEL: omp_distribute -func.func @omp_distribute(%chunk_size : i32, %data_var : memref<i32>) -> () { +func.func @omp_distribute(%chunk_size : i32, %data_var : memref<i32>, %arg0 : i32) -> () { // CHECK: omp.distribute "omp.distribute" () ({ - omp.terminator + "omp.loop_nest" (%arg0, %arg0, %arg0) ({ + ^bb0(%iv: i32): + "omp.yield"() : () -> () + }) : (i32, i32, i32) -> () + "omp.terminator"() : () -> () }) {} : () -> () // CHECK: omp.distribute omp.distribute { - omp.terminator + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } } // CHECK: omp.distribute dist_schedule_static omp.distribute dist_schedule_static { - omp.terminator + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } } // CHECK: omp.distribute dist_schedule_static chunk_size(%{{.+}} : i32) omp.distribute dist_schedule_static chunk_size(%chunk_size : i32) { - omp.terminator + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } } // CHECK: omp.distribute order(concurrent) omp.distribute order(concurrent) { - omp.terminator + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } } // CHECK: omp.distribute allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) omp.distribute allocate(%data_var : memref<i32> -> %data_var : memref<i32>) { - omp.terminator + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + // CHECK: omp.distribute + omp.distribute { + // TODO Remove induction variables from omp.simdloop. + omp.simdloop for (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.loop_nest (%iv2) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + omp.yield + } } return } @@ -2000,135 +2057,128 @@ func.func @omp_taskgroup_clauses() -> () { // CHECK-LABEL: @omp_taskloop func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { - // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { - omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) { - // CHECK: omp.terminator - omp.terminator - } - - // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { - omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) { - // CHECK: test.op1 - "test.op1"(%lb) : (i32) -> () - // CHECK: test.op2 - "test.op2"() : () -> () - // CHECK: omp.terminator - omp.terminator - } - - // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator - } - - // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) { - omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop { + omp.taskloop { + omp.loop_nest (%i) : i32 = (%lb) to (%ub) step (%step) { + // CHECK: omp.yield + omp.yield + } } %testbool = "test.bool"() : () -> (i1) - // CHECK: omp.taskloop if(%{{[^)]+}}) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop if(%testbool) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop if(%{{[^)]+}}) { + omp.taskloop if(%testbool) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop final(%{{[^)]+}}) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop final(%testbool) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop final(%{{[^)]+}}) { + omp.taskloop final(%testbool) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop untied - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop untied - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop untied { + omp.taskloop untied { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop mergeable - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop mergeable - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop mergeable { + omp.taskloop mergeable { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } %testf32 = "test.f32"() : () -> (!llvm.ptr) %testf32_2 = "test.f32"() : () -> (!llvm.ptr) - // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) { + omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) { + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr) reduction(@add_f32 -> %testf32_2 : !llvm.ptr) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) { + omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr) reduction(@add_f32 -> %testf32_2 : !llvm.ptr) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } %testi32 = "test.i32"() : () -> (i32) - // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop priority(%testi32: i32) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) { + omp.taskloop priority(%testi32: i32) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } %testmemref = "test.memref"() : () -> (memref<i32>) - // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) - omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop allocate(%{{.+}} : memref<i32> -> %{{.+}} : memref<i32>) { + omp.taskloop allocate(%testmemref : memref<i32> -> %testmemref : memref<i32>) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } %testi64 = "test.i64"() : () -> (i64) - // CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop grain_size(%testi64: i64) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64) { + omp.taskloop grain_size(%testi64: i64) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop num_tasks(%testi64: i64) - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) { + omp.taskloop num_tasks(%testi64: i64) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } } - // CHECK: omp.taskloop nogroup - // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { - omp.taskloop nogroup - for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { - // CHECK: omp.terminator - omp.terminator + // CHECK: omp.taskloop nogroup { + omp.taskloop nogroup { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } + } + + // CHECK: omp.taskloop { + omp.taskloop { + // TODO Remove induction variables from omp.simdloop. + omp.simdloop for (%iv) : i32 = (%lb) to (%ub) step (%step) { + omp.loop_nest (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.yield + omp.yield + } + // CHECK: omp.yield + omp.yield + } } // CHECK: return diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 627ac54..61a5f2a 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -1943,14 +1943,6 @@ func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5 return %shuffle : vector<5xi32> } -// CHECK-LABEL: func @shuffle_nofold2 -// CHECK: %[[V:.+]] = vector.shuffle %arg0, %arg1 [0, 1, 2, 3] : vector<[4]xi32>, vector<[2]xi32> -// CHECK: return %[[V]] -func.func @shuffle_nofold2(%v0 : vector<[4]xi32>, %v1 : vector<[2]xi32>) -> vector<4xi32> { - %shuffle = vector.shuffle %v0, %v1 [0, 1, 2, 3] : vector<[4]xi32>, vector<[2]xi32> - return %shuffle : vector<4xi32> -} - // ----- // CHECK-LABEL: func @transpose_scalar_broadcast1 diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index c16f1cb..c9f7e9c 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -84,6 +84,13 @@ func.func @shuffle_index_out_of_range(%arg0: vector<2xf32>, %arg1: vector<2xf32> // ----- +func.func @shuffle_scalable_vec(%arg0: vector<[2]xf32>, %arg1: vector<[2]xf32>) { + // expected-error@+1 {{'vector.shuffle' op operand #0 must be fixed-length vector of any type values}} + %1 = vector.shuffle %arg0, %arg1 [0, 1, 2, 3] : vector<[2]xf32>, vector<[2]xf32> +} + +// ----- + func.func @shuffle_empty_mask(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { // expected-error@+1 {{'vector.shuffle' op invalid mask length}} %1 = vector.shuffle %arg0, %arg1 [] : vector<2xf32>, vector<2xf32> diff --git a/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir b/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir new file mode 100644 index 0000000..05a78e3 --- /dev/null +++ b/mlir/test/Integration/Dialect/Tosa/CPU/test-maxpool-dynamic.mlir @@ -0,0 +1,112 @@ +// DEFINE: %{tosa-to-linalg-pipeline} = -pass-pipeline="builtin.module(func.func(tosa-infer-shapes,tosa-to-linalg-named,tosa-to-linalg,tosa-to-arith))" + +// RUN: mlir-opt %s \ +// RUN: %{tosa-to-linalg-pipeline} \ +// RUN: | mlir-opt \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -buffer-deallocation-pipeline \ +// RUN: -test-lower-to-llvm \ +// RUN: | mlir-cpu-runner \ +// RUN: -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils \ +// RUN: | FileCheck %s + +// Validate that the TOSA lowering for tosa.max_pool2d produces the same results when +// for fully static and fully dynamic inputs. + +!tensor_type = tensor<1x4x4x1xf32> +!memref_type = memref<1x4x4x1xf32> + +// Utility functions +func.func private @printMemrefF32(memref<*xf32>) attributes { llvm.emit_c_interface } + +func.func @max_pool_static(%arg0: !tensor_type) -> (!tensor_type) { + %0 = tosa.max_pool2d %arg0 { + pad = array<i64: 1, 1, 1, 1>, + kernel = array<i64: 3, 3>, + stride = array<i64: 1, 1> + } : (tensor<1x4x4x1xf32>) -> tensor<1x4x4x1xf32> + return %0 : tensor<1x4x4x1xf32> +} + +func.func @max_pool_dynamic(%arg0: tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>) { + %0 = tosa.max_pool2d %arg0 { + pad = array<i64: 1, 1, 1, 1>, + kernel = array<i64: 3, 3>, + stride = array<i64: 1, 1> + } : (tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> + return %0 : tensor<?x?x?x?xf32> +} + +// Test harness to compare the results of a fully statically shaped max_pool2d with +// a fully dynamically shaped max_pool2d on the same inputs. +func.func @main() { + %A = arith.constant dense<[[ + [[0.0], [0.1], [0.2], [0.3]], // H = 0 + [[1.0], [1.1], [1.2], [1.3]], // H = 1 + [[2.0], [2.1], [2.2], [2.3]], // H = 2 + [[3.0], [3.1], [3.2], [3.3]] // H = 3 + ]]> : tensor<1x4x4x1xf32> + + %A_dynamic = tensor.cast %A : !tensor_type to tensor<?x?x?x?xf32> + + // Call both static and dynamically sized variants + %result_static = func.call @max_pool_static(%A) : (!tensor_type) -> !tensor_type + %result_dynamic = func.call @max_pool_dynamic(%A_dynamic) : (tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> + + %static_buffer = bufferization.to_memref %result_static : !memref_type + %unranked_static_buffer = memref.cast %static_buffer : !memref_type to memref<*xf32> + + // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 4, 4, 1] strides = [16, 4, 1, 1] data = + + // CHECK-NEXT: 1.1 + // CHECK-NEXT: 1.2 + // CHECK-NEXT: 1.3 + // CHECK-NEXT: 1.3 + + // CHECK-NEXT: 2.1 + // CHECK-NEXT: 2.2 + // CHECK-NEXT: 2.3 + // CHECK-NEXT: 2.3 + + // CHECK-NEXT: 3.1 + // CHECK-NEXT: 3.2 + // CHECK-NEXT: 3.3 + // CHECK-NEXT: 3.3 + + // CHECK-NEXT: 3.1 + // CHECK-NEXT: 3.2 + // CHECK-NEXT: 3.3 + // CHECK-NEXT: 3.3 + + func.call @printMemrefF32(%unranked_static_buffer) : (memref<*xf32>) -> () + + %dynamic_buffer = bufferization.to_memref %result_dynamic : memref<?x?x?x?xf32> + %unranked_dynamic_buffer = memref.cast %dynamic_buffer : memref<?x?x?x?xf32> to memref<*xf32> + + // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 4, 4, 1] strides = [16, 4, 1, 1] data = + // CHECK-NEXT: 1.1 + // CHECK-NEXT: 1.2 + // CHECK-NEXT: 1.3 + // CHECK-NEXT: 1.3 + + // CHECK-NEXT: 2.1 + // CHECK-NEXT: 2.2 + // CHECK-NEXT: 2.3 + // CHECK-NEXT: 2.3 + + // CHECK-NEXT: 3.1 + // CHECK-NEXT: 3.2 + // CHECK-NEXT: 3.3 + // CHECK-NEXT: 3.3 + + // CHECK-NEXT: 3.1 + // CHECK-NEXT: 3.2 + // CHECK-NEXT: 3.3 + // CHECK-NEXT: 3.3 + + func.call @printMemrefF32(%unranked_dynamic_buffer) : (memref<*xf32>) -> () + + return +} + diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 81a6eadba..bf6847a 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -597,7 +597,7 @@ define void @ushl_sat_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { } ; CHECK-LABEL: llvm.func @va_intrinsics_test -define void @va_intrinsics_test(ptr %0, ptr %1) { +define void @va_intrinsics_test(ptr %0, ptr %1, ...) { ; CHECK: llvm.intr.vastart %{{.*}} call void @llvm.va_start.p0(ptr %0) ; CHECK: llvm.intr.vacopy %{{.*}} to %{{.*}} diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 6730f9b..b098a5a 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -109,7 +109,7 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp, FailureOr<OpFoldResult> reified = failure(); if (constant) { auto reifiedConst = ValueBoundsConstraintSet::computeConstantBound( - boundType, value, dim, /*stopCondition=*/nullptr); + boundType, {value, dim}, /*stopCondition=*/nullptr); if (succeeded(reifiedConst)) reified = FailureOr<OpFoldResult>(rewriter.getIndexAttr(*reifiedConst)); } else if (scalable) { @@ -128,22 +128,12 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp, rewriter, loc, reifiedScalable->map, vscaleOperand); } } else { - if (dim) { - if (useArithOps) { - reified = arith::reifyShapedValueDimBound( - rewriter, op->getLoc(), boundType, value, *dim, stopCondition); - } else { - reified = reifyShapedValueDimBound(rewriter, op->getLoc(), boundType, - value, *dim, stopCondition); - } + if (useArithOps) { + reified = arith::reifyValueBound(rewriter, op->getLoc(), boundType, + op.getVariable(), stopCondition); } else { - if (useArithOps) { - reified = arith::reifyIndexValueBound( - rewriter, op->getLoc(), boundType, value, stopCondition); - } else { - reified = reifyIndexValueBound(rewriter, op->getLoc(), boundType, - value, stopCondition); - } + reified = reifyValueBound(rewriter, op->getLoc(), boundType, + op.getVariable(), stopCondition); } } if (failed(reified)) { @@ -188,9 +178,7 @@ static LogicalResult testEquality(func::FuncOp funcOp) { } auto compare = [&](ValueBoundsConstraintSet::ComparisonOperator cmp) { - return ValueBoundsConstraintSet::compare( - /*lhs=*/op.getLhs(), /*lhsDim=*/std::nullopt, cmp, - /*rhs=*/op.getRhs(), /*rhsDim=*/std::nullopt); + return ValueBoundsConstraintSet::compare(op.getLhs(), cmp, op.getRhs()); }; if (compare(cmpType)) { op->emitRemark("true"); diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index 25c5190..a23ed89 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -549,6 +549,12 @@ LogicalResult ReifyBoundOp::verify() { return success(); } +::mlir::ValueBoundsConstraintSet::Variable ReifyBoundOp::getVariable() { + if (getDim().has_value()) + return ValueBoundsConstraintSet::Variable(getVar(), *getDim()); + return ValueBoundsConstraintSet::Variable(getVar()); +} + ::mlir::ValueBoundsConstraintSet::ComparisonOperator CompareOp::getComparisonOperator() { if (getCmp() == "EQ") @@ -564,6 +570,37 @@ CompareOp::getComparisonOperator() { llvm_unreachable("invalid comparison operator"); } +::mlir::ValueBoundsConstraintSet::Variable CompareOp::getLhs() { + if (!getLhsMap()) + return ValueBoundsConstraintSet::Variable(getVarOperands()[0]); + SmallVector<Value> mapOperands( + getVarOperands().slice(0, getLhsMap()->getNumInputs())); + return ValueBoundsConstraintSet::Variable(*getLhsMap(), mapOperands); +} + +::mlir::ValueBoundsConstraintSet::Variable CompareOp::getRhs() { + int64_t rhsOperandsBegin = getLhsMap() ? getLhsMap()->getNumInputs() : 1; + if (!getRhsMap()) + return ValueBoundsConstraintSet::Variable( + getVarOperands()[rhsOperandsBegin]); + SmallVector<Value> mapOperands( + getVarOperands().slice(rhsOperandsBegin, getRhsMap()->getNumInputs())); + return ValueBoundsConstraintSet::Variable(*getRhsMap(), mapOperands); +} + +LogicalResult CompareOp::verify() { + if (getCompose() && (getLhsMap() || getRhsMap())) + return emitOpError( + "'compose' not supported when 'lhs_map' or 'rhs_map' is present"); + int64_t expectedNumOperands = getLhsMap() ? getLhsMap()->getNumInputs() : 1; + expectedNumOperands += getRhsMap() ? getRhsMap()->getNumInputs() : 1; + if (getVarOperands().size() != size_t(expectedNumOperands)) + return emitOpError("expected ") + << expectedNumOperands << " operands, but got " + << getVarOperands().size(); + return success(); +} + //===----------------------------------------------------------------------===// // Test removing op with inner ops. //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index ebf158b..b641b3d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2207,6 +2207,7 @@ def ReifyBoundOp : TEST_Op<"reify_bound", [Pure]> { let extraClassDeclaration = [{ ::mlir::presburger::BoundType getBoundType(); + ::mlir::ValueBoundsConstraintSet::Variable getVariable(); }]; let hasVerifier = 1; @@ -2217,18 +2218,29 @@ def CompareOp : TEST_Op<"compare"> { Compare `lhs` and `rhs`. A remark is emitted which indicates whether the specified comparison operator was proven to hold. The remark also indicates whether the opposite comparison operator was proven to hold. + + `var_operands` must have exactly two operands: one for the LHS operand and + one for the RHS operand. If `lhs_map` is specified, as many operands as + `lhs_map` has inputs are expected instead of the first operand. If `rhs_map` + is specified, as many operands as `rhs_map` has inputs are expected instead + of the second operand. }]; - let arguments = (ins Index:$lhs, - Index:$rhs, + let arguments = (ins Variadic<Index>:$var_operands, DefaultValuedAttr<StrAttr, "\"EQ\"">:$cmp, + OptionalAttr<AffineMapAttr>:$lhs_map, + OptionalAttr<AffineMapAttr>:$rhs_map, UnitAttr:$compose); let results = (outs); let extraClassDeclaration = [{ ::mlir::ValueBoundsConstraintSet::ComparisonOperator getComparisonOperator(); + ::mlir::ValueBoundsConstraintSet::Variable getLhs(); + ::mlir::ValueBoundsConstraintSet::Variable getRhs(); }]; + + let hasVerifier = 1; } //===----------------------------------------------------------------------===// |