diff options
author | Andrzej Warzyński <andrzej.warzynski@arm.com> | 2025-05-23 14:08:31 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-23 14:08:31 +0100 |
commit | 6e98c8cb2749b031eb03d73e652d0f897085e150 (patch) | |
tree | 8e031f5bc6f1a3f7c412fbe9f18882db4327bc85 | |
parent | 16fdb4f62d60df697ec69bcdc0e116e58c180682 (diff) | |
download | llvm-6e98c8cb2749b031eb03d73e652d0f897085e150.zip llvm-6e98c8cb2749b031eb03d73e652d0f897085e150.tar.gz llvm-6e98c8cb2749b031eb03d73e652d0f897085e150.tar.bz2 |
[mlir][linalg] Move vectorization tests for Tensor Ops (nfc) (#140877)
This patch reorganises vectorisation tests for tensor ops:
* Tests for `tensor.pad` and `tensor.insert_slice` are extracted into
dedicated files under a new `vectorization/` subdirectory.
* Test files for `tensor.extract` are renamed and moved to the same
subdirectory.
Goals:
* Unify test file naming.
* Better organise the growing set of tests, which are currently hard to
navigate.
This is also a preparatory step for upcoming changes. I’ll soon be updating the
vectorisation logic for `tensor.pad` and `tensor.insert_slice`. With the new
structure in place, follow-up changes will be easier to review:
* Only tests related to those ops will be updated.
* Changes (e.g., to masking logic) will be isolated to the relevant tests.
This patch implements part of #141025 - please see the ticket for full context.
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir | 315 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization.mlir | 277 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir (renamed from mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir) | 2 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/extract.mlir (renamed from mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir) | 0 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir | 90 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/insert-slice.mlir | 150 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/pad-with-patterns.mlir | 227 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/pad.mlir | 131 |
8 files changed, 599 insertions, 593 deletions
diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 9f2ee47..b282c57 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -889,207 +889,6 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func @pad_static( -// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[INIT:.*]] = tensor.empty() : tensor<2x3x4xf32> -// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32> -// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> -// CHECK: return %[[RESULT]] -func.func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { - %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - tensor.yield %pad_value : f32 - } : tensor<2x?x2xf32> to tensor<2x3x4xf32> - return %0 : tensor<2x3x4xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @pad_static_source( -// CHECK-SAME: %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32 -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<2x6x4xf32> -// CHECK: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x6x4xf32> -// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<2x6x4xf32>, tensor<2x6x4xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : tensor<2x5x2xf32>, vector<2x5x2xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32> -// CHECK: return %[[WRITE]] -func.func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> { - %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - tensor.yield %pad_value : f32 - } : tensor<2x5x2xf32> to tensor<2x6x4xf32> - return %0 : tensor<2x6x4xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - -// ----- - -// CHECK-LABEL: func @pad_static_dynamic( -// CHECK-SAME: %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index -// CHECK: %[[V0:.*]] = arith.addi %[[LOW]], %[[C2]] : index -// CHECK: %[[V1:.*]] = arith.addi %[[V0]], %[[C3]] : index -// CHECK: %[[V2:.*]] = arith.addi %[[HIGH]], %[[C5]] : index -// CHECK: %[[DIM3:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> -// CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index -// CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index -// CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> -// CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> -// CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> -// CHECK: return %[[RESULT]] -func.func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, - %pad_value: f32) -> tensor<6x?x?x?xf32> { - %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - tensor.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - return %0 : tensor<6x?x?x?xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @pad_static_complex( -// CHECK-NOT: vector< -func.func @pad_static_complex(%arg0: tensor<2x5x2xcomplex<f32>>, %pad_value: complex<f32>) -> tensor<2x6x4xcomplex<f32>> { - %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { - ^bb0(%arg1: index, %arg2: index, %arg3: index): - tensor.yield %pad_value : complex<f32> - } : tensor<2x5x2xcomplex<f32>> to tensor<2x6x4xcomplex<f32>> - return %0 : tensor<2x6x4xcomplex<f32>> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -func.func private @make_vector() -> tensor<12x13xf32> - -// CHECK-LABEL: func.func @pad_and_insert_slice_dest( -// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[CST:.*]] = arith.constant dense<5.000000e+00> : vector<1x12x13xf32> -// CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index -// CHECK: %[[PAD_VAL:.*]] = arith.constant 5.000000e+00 : f32 -// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> -// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %[[CST]], %[[EMPTY]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x12x13xf32>, tensor<1x12x13xf32> -// CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]], %[[PAD_VAL]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> -// CHECK: %[[WRITE_2:.*]] = vector.transfer_write %[[READ_1]], %[[WRITE_1]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> -// CHECK: %[[MAKE_VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> -// CHECK: %[[READ_2:.*]] = vector.transfer_read %[[MAKE_VEC]]{{\[}}%[[C0_IDX]], %[[C0_IDX]]], %[[C0]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_2]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32> -// CHECK: return %[[RES]] : tensor<1x12x13xf32> -func.func @pad_and_insert_slice_dest( - %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { - ^bb0(%arg2: index, %arg3: index, %arg4: index): - tensor.yield %c5 : f32 - } : tensor<1x5x6xf32> to tensor<1x12x13xf32> - %1 = call @make_vector() : () -> tensor<12x13xf32> - %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> - return %r : tensor<1x12x13xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @pad_tensor_non_const_pad_value -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index -// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index -// CHECK: %[[FILL:.*]] = tensor.generate -// CHECK: %[[RES:.*]] = arith.mulf -// CHECK: tensor.yield %[[RES]] : f32 -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : tensor<5x6xf32>, vector<5x6xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C3]], %[[C4]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<12x13xf32> -// CHECK: return %[[WRITE]] -func.func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[3, 4] high[4, 3] { - ^bb0(%arg1: index, %arg2: index): - %i1 = arith.index_cast %arg1 : index to i32 - %i2 = arith.index_cast %arg2 : index to i32 - %f1 = arith.sitofp %i1 : i32 to f32 - %f2 = arith.sitofp %i2 : i32 to f32 - %m = arith.mulf %f1, %f2 : f32 - tensor.yield %m : f32 - } : tensor<5x6xf32> to tensor<12x13xf32> - return %0 : tensor<12x13xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - // CHECK-LABEL: func @sum_exp func.func @sum_exp(%input: tensor<4x16x8xf32>, %output: tensor<4x16xf32>) -> tensor<4x16xf32> @@ -1805,29 +1604,6 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func @test_masked_pad_static_dynamic -func.func @test_masked_pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, - %pad_value: f32) -> tensor<6x?x?x?xf32> { - // CHECK: tensor.pad - %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { - ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): - tensor.yield %pad_value : f32 - } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> - return %0 : tensor<6x?x?x?xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - func.func @zero_dim_tensor(%input: tensor<f32>, %output: tensor<f32>) -> tensor<f32> { %0 = linalg.generic { indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], @@ -2001,94 +1777,3 @@ module attributes {transform.with_named_sequence} { transform.yield } } - -// ----- - -///---------------------------------------------------------------------------------------- -/// tensor.insert_slice -///---------------------------------------------------------------------------------------- - -// The pad value for xfer-read is neither needed nor available - use the default (0.0). - -// CHECK-LABEL: func @insert_static_slice_default_pad -// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, -// CHECK-SAME: %[[ARG_1:.*]]: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { -// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> -// CHECK: return %[[WRITE]] : tensor<9x8x7x1x2x3xf32> -func.func @insert_static_slice_default_pad(%arg0: tensor<1x2x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { - %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> - return %res : tensor<9x8x7x1x2x3xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// Same as above, but there's a pad value available that should be used instead of the default value. - -// CHECK-LABEL: func.func @insert_static_slice_non_zero_pad -// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, -// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<9x8x7x1x2x3xf32> { -// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> -// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> -// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> -func.func @insert_static_slice_non_zero_pad(%arg0: tensor<1x2x3xf32>, %pad : f32) -> tensor<9x8x7x1x2x3xf32> { - %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> - %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> - %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> - return %res : tensor<9x8x7x1x2x3xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// Same as above, but the source type has is dynamically shaped. This means -// that the pad value is now required and the vector dim corresponding to the -// dynamic shape has to be inferred from the shape of the destination tensor. - -// CHECK-LABEL: func.func @insert_dynamic_slice_non_zero_pad( -// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x?x3xf32>, -// CHECK-SAME: %[[PAD:.*]]: f32, -// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<9x8x7x1x2x3xf32> { -// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> -// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, false, true]} : tensor<1x?x3xf32>, vector<1x2x3xf32> -// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> -// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> -func.func @insert_dynamic_slice_non_zero_pad(%arg0: tensor<1x?x3xf32>, %pad : f32, %size: index) -> tensor<9x8x7x1x2x3xf32> { - %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> - %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> - %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32> - return %res : tensor<9x8x7x1x2x3xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 6b760a1..8c6760f 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -580,133 +580,6 @@ module attributes {transform.with_named_sequence} { } } -// ----- - -// CHECK-LABEL: func @test_masked_vectorize_pad -func.func @test_masked_vectorize_pad( - %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) - -> tensor<2x4xf32> -{ - // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 - // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[c0_0:.*]] = arith.constant 0 : index - // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> - // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { - // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_0]], %[[c0_0]]], %[[c42]] - // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> - // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> - // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32> - // CHECK: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_1]], %[[c0_1]]] - // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<2x4xf32> - %cst = arith.constant 42.43 : f32 - %c0 = arith.constant 0 : index - %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { - ^bb0(%hh1: index, %hh2: index): - tensor.yield %cst : f32 - } : tensor<?x?xf32> to tensor<2x4xf32> - return %1: tensor<2x4xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 - : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> -// CHECK: func @test_masked_vectorize_dynamic_pad -func.func @test_masked_vectorize_dynamic_pad( - %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) - -> tensor<?x?xf32> -{ - // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 - // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[res_d0:.+]] = affine.apply #[[MAP]]() - // CHECK-DAG: %[[res_d1:.+]] = affine.apply #[[MAP]]() - // CHECK: %[[c0_2:.*]] = arith.constant 0 : index - // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> - // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { - // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]] - // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> - // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> - // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[res_d0]], %[[res_d1]]) : tensor<?x?xf32> - // CHECK-DAG: %[[c0_3:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[d2:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?xf32> - // CHECK-DAG: %[[d3:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?xf32> - // CHECK: %[[mask_2:.*]] = vector.create_mask %[[d2]], %[[d3]] : vector<2x4xi1> - // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_2]] { - // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_3]], %[[c0_3]]] - // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<?x?xf32> - // CHECK: return %[[masked_write]] : tensor<?x?xf32> - %cst = arith.constant 42.43 : f32 - %c0 = arith.constant 0 : index - %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { - ^bb0(%hh1: index, %hh2: index): - tensor.yield %cst : f32 - } : tensor<?x?xf32> to tensor<?x?xf32> - return %1: tensor<?x?xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 - : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op - transform.yield - } -} - -// ----- -// This case is supported because low padding `%l0` is applied on -// a unit dimension which is supported, non unit result dimension low -// padding is currently unsupported. -// CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim -func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim( - %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index) - -> tensor<1x4xf32> -{ - // CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32 - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[C0_1:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> - // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1> - // CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] { - // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]] - // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32> - // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32> - // CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32> - // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index - // CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]] - // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> - // CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32> - %cst = arith.constant 42.43 : f32 - %c0 = arith.constant 0 : index - %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { - ^bb0(%hh1: index, %hh2: index): - tensor.yield %cst : f32 - } : tensor<?x?xf32> to tensor<1x4xf32> - return %1: tensor<1x4xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 - : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op - transform.yield - } -} // ----- @@ -1155,153 +1028,3 @@ func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf } } -// ----- - -///---------------------------------------------------------------------------------------- -/// tensor.insert_slice -///---------------------------------------------------------------------------------------- - -func.func private @insert_slice_static_sizes(%source: tensor<?x3x?x1xi32>) -> tensor<5x3xi32> { - %c2 = arith.constant 2 : index - %init = tensor.empty() : tensor<5x3xi32> - - %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> - %res = tensor.insert_slice %source_slice into %init[0, %c2] [5, 1] [1, 1] : tensor<5x1xi32> into tensor<5x3xi32> - - return %res : tensor<5x3xi32> -} - -// CHECK-LABEL: func.func private @insert_slice_static_sizes( -// CHECK-SAME: %[[SEC:.*]]: tensor<?x3x?x1xi32>) -> tensor<5x3xi32> { -// CHECK: %[[C_2:.*]] = arith.constant 2 : index -// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<5x3xi32> -// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SEC]][0, %[[C_2]], 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[C_5:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index -// CHECK: %[[MASK:.*]] = vector.create_mask %[[C_5]], %[[C_1]] : vector<8x1xi1> -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C0]], %[[C0]]], %[[PAD]] : tensor<5x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> -// CHECK: %[[C_0:.*]] = arith.constant 0 : index -// CHECK: %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0]], %[[C_2]]] : vector<8x1xi32>, tensor<5x3xi32> } : vector<8x1xi1> -> tensor<5x3xi32> -// CHECK: return %[[RES]] : tensor<5x3xi32> - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op - transform.yield - } - } - -// ----- - -// One of the _source_ dimensions is dynamic (but _destination_ dimensions are static). - -func.func private @insert_slice_dynamic_src_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<5x3xi32> { - %c2 = arith.constant 2 : index - %init = tensor.empty() : tensor<5x3xi32> - - %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, %size, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> - %res = tensor.insert_slice %source_slice into %init[0, %c2] [%size, 1] [1, 1] : tensor<?x1xi32> into tensor<5x3xi32> - - return %res : tensor<5x3xi32> -} - -// CHECK-LABEL: func.func private @insert_slice_dynamic_src_dim( -// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, -// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<5x3xi32> { -// CHECK: %[[C_2:.*]] = arith.constant 2 : index -// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<5x3xi32> -// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, %[[SIZE]], 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> -// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index -// CHECK: %[[MASK:.*]] = vector.create_mask %[[SIZE]], %[[C_1]] : vector<8x1xi1> -// CHECK: %[[C_0:.*]] = arith.constant 0 : index -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C_0]], %[[C_0]]], %[[PAD]] : tensor<?x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> -// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index -// CHECK: %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<5x3xi32> } : vector<8x1xi1> -> tensor<5x3xi32> -// CHECK: return %[[RES]] : tensor<5x3xi32> - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op - transform.yield - } - } - -// ----- - -// One of the _destination_ dimensions is dynamic (but _source_ dimensions are static). - -func.func private @insert_slice_dynamic_dest_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<?x3xi32> { - %c2 = arith.constant 2 : index - %init = tensor.empty(%size) : tensor<?x3xi32> - - %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> - %res = tensor.insert_slice %source_slice into %init[0, %c2] [5, 1] [1, 1] : tensor<5x1xi32> into tensor<?x3xi32> - - return %res : tensor<?x3xi32> -} - -// CHECK-LABEL: func.func private @insert_slice_dynamic_dest_dim( -// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, -// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<?x3xi32> { -// CHECK: %[[C_2:.*]] = arith.constant 2 : index -// CHECK: %[[INIT:.*]] = tensor.empty(%[[SIZE]]) : tensor<?x3xi32> -// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> -// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK: %[[C_5:.*]] = arith.constant 5 : index -// CHECK: %[[C_1:.*]] = arith.constant 1 : index -// CHECK: %[[MASK:.*]] = vector.create_mask %[[C_5]], %[[C_1]] : vector<8x1xi1> -// CHECK: %[[C_0:.*]] = arith.constant 0 : index -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C_0]], %[[C_0]]], %[[PAD]] : tensor<5x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> -// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index -// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<?x3xi32> } : vector<8x1xi1> -> tensor<?x3xi32> -// CHECK: return %[[WRITE]] : tensor<?x3xi32> - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op - transform.yield - } - } - -// ----- - -// At least one _source_ and one _destination_ dimensions are dynamic. - -func.func private @insert_slice_dynamic_source_and_dest_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<?x3xi32> { - %c2 = arith.constant 2 : index - %init = tensor.empty(%size) : tensor<?x3xi32> - - %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, %size, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> - %res = tensor.insert_slice %source_slice into %init[0, %c2] [%size, 1] [1, 1] : tensor<?x1xi32> into tensor<?x3xi32> - - return %res : tensor<?x3xi32> -} - -// CHECK-LABEL: func.func private @insert_slice_dynamic_source_and_dest_dim( -// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, -// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<?x3xi32> { -// CHECK: %[[C_2:.*]] = arith.constant 2 : index -// CHECK: %[[INIT:.*]] = tensor.empty(%[[SIZE]]) : tensor<?x3xi32> -// CHECK: %[[SRC_SIZE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, %[[SIZE]], 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> -// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 -// CHECK: %[[C1:.*]] = arith.constant 1 : index -// CHECK: %[[MASK:.*]] = vector.create_mask %[[SIZE]], %[[C1]] : vector<8x1xi1> -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SIZE]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : tensor<?x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> -// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index -// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]]{{\[}}%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<?x3xi32> } : vector<8x1xi1> -> tensor<?x3xi32> -// CHECK: return %[[WRITE]] : tensor<?x3xi32> - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op - transform.yield - } - } diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir index 01eafaf..f62e257 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/extract-with-patterns.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt -split-input-file \ -// RUN: -transform-preload-library='transform-library-paths=%p/td/vectorize-with-patterns.mlir' \ +// RUN: -transform-preload-library='transform-library-paths=%p/../td/vectorize-with-patterns.mlir' \ // RUN: -transform-interpreter=entry-point=vectorize_with_patterns %s | FileCheck %s //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir b/mlir/test/Dialect/Linalg/vectorization/extract.mlir index d0d3b58..d0d3b58 100644 --- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/extract.mlir diff --git a/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir new file mode 100644 index 0000000..f7764be --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization/insert-slice-with-patterns.mlir @@ -0,0 +1,90 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// Tests for tensor.insert_slice +///---------------------------------------------------------------------------------------- + +// The pad value for xfer-read is neither needed nor available - use the default (0.0). + +// CHECK-LABEL: func @insert_static_slice_default_pad +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, +// CHECK-SAME: %[[ARG_1:.*]]: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[WRITE]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_static_slice_default_pad(%arg0: tensor<1x2x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { + %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// Same as above, but there's a pad value available that should be used instead of the default value. + +// CHECK-LABEL: func.func @insert_static_slice_non_zero_pad +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> +// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_static_slice_non_zero_pad(%arg0: tensor<1x2x3xf32>, %pad : f32) -> tensor<9x8x7x1x2x3xf32> { + %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> + %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> + %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// Same as above, but the source type has is dynamically shaped. This means +// that the pad value is now required and the vector dim corresponding to the +// dynamic shape has to be inferred from the shape of the destination tensor. + +// CHECK-LABEL: func.func @insert_dynamic_slice_non_zero_pad( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x?x3xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32, +// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> +// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, false, true]} : tensor<1x?x3xf32>, vector<1x2x3xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_dynamic_slice_non_zero_pad(%arg0: tensor<1x?x3xf32>, %pad : f32, %size: index) -> tensor<9x8x7x1x2x3xf32> { + %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> + %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> + %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization/insert-slice.mlir b/mlir/test/Dialect/Linalg/vectorization/insert-slice.mlir new file mode 100644 index 0000000..ddd4f43 --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization/insert-slice.mlir @@ -0,0 +1,150 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// Tests for tensor.insert_slice +///---------------------------------------------------------------------------------------- + +func.func private @insert_slice_static_sizes(%source: tensor<?x3x?x1xi32>) -> tensor<5x3xi32> { + %c2 = arith.constant 2 : index + %init = tensor.empty() : tensor<5x3xi32> + + %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> + %res = tensor.insert_slice %source_slice into %init[0, %c2] [5, 1] [1, 1] : tensor<5x1xi32> into tensor<5x3xi32> + + return %res : tensor<5x3xi32> +} + +// CHECK-LABEL: func.func private @insert_slice_static_sizes( +// CHECK-SAME: %[[SEC:.*]]: tensor<?x3x?x1xi32>) -> tensor<5x3xi32> { +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<5x3xi32> +// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SEC]][0, %[[C_2]], 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C_5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index +// CHECK: %[[MASK:.*]] = vector.create_mask %[[C_5]], %[[C_1]] : vector<8x1xi1> +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C0]], %[[C0]]], %[[PAD]] : tensor<5x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0]], %[[C_2]]] : vector<8x1xi32>, tensor<5x3xi32> } : vector<8x1xi1> -> tensor<5x3xi32> +// CHECK: return %[[RES]] : tensor<5x3xi32> + + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op + transform.yield + } + } + +// ----- + +// One of the _source_ dimensions is dynamic (but _destination_ dimensions are static). + +func.func private @insert_slice_dynamic_src_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<5x3xi32> { + %c2 = arith.constant 2 : index + %init = tensor.empty() : tensor<5x3xi32> + + %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, %size, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> + %res = tensor.insert_slice %source_slice into %init[0, %c2] [%size, 1] [1, 1] : tensor<?x1xi32> into tensor<5x3xi32> + + return %res : tensor<5x3xi32> +} + +// CHECK-LABEL: func.func private @insert_slice_dynamic_src_dim( +// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, +// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<5x3xi32> { +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<5x3xi32> +// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, %[[SIZE]], 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> +// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[C_1:.*]] = arith.constant 1 : index +// CHECK: %[[MASK:.*]] = vector.create_mask %[[SIZE]], %[[C_1]] : vector<8x1xi1> +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C_0]], %[[C_0]]], %[[PAD]] : tensor<?x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> +// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index +// CHECK: %[[RES:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<5x3xi32> } : vector<8x1xi1> -> tensor<5x3xi32> +// CHECK: return %[[RES]] : tensor<5x3xi32> + + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op + transform.yield + } + } + +// ----- + +// One of the _destination_ dimensions is dynamic (but _source_ dimensions are static). + +func.func private @insert_slice_dynamic_dest_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<?x3xi32> { + %c2 = arith.constant 2 : index + %init = tensor.empty(%size) : tensor<?x3xi32> + + %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> + %res = tensor.insert_slice %source_slice into %init[0, %c2] [5, 1] [1, 1] : tensor<5x1xi32> into tensor<?x3xi32> + + return %res : tensor<?x3xi32> +} + +// CHECK-LABEL: func.func private @insert_slice_dynamic_dest_dim( +// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, +// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<?x3xi32> { +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[INIT:.*]] = tensor.empty(%[[SIZE]]) : tensor<?x3xi32> +// CHECK: %[[SRC_SLICE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, 5, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<5x1xi32> +// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK: %[[C_5:.*]] = arith.constant 5 : index +// CHECK: %[[C_1:.*]] = arith.constant 1 : index +// CHECK: %[[MASK:.*]] = vector.create_mask %[[C_5]], %[[C_1]] : vector<8x1xi1> +// CHECK: %[[C_0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SLICE]][%[[C_0]], %[[C_0]]], %[[PAD]] : tensor<5x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> +// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index +// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]][%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<?x3xi32> } : vector<8x1xi1> -> tensor<?x3xi32> +// CHECK: return %[[WRITE]] : tensor<?x3xi32> + + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op + transform.yield + } + } + +// ----- + +// At least one _source_ and one _destination_ dimensions are dynamic. + +func.func private @insert_slice_dynamic_source_and_dest_dim(%source: tensor<?x3x?x1xi32>, %size: index) -> tensor<?x3xi32> { + %c2 = arith.constant 2 : index + %init = tensor.empty(%size) : tensor<?x3xi32> + + %source_slice = tensor.extract_slice %source[0, %c2, 0, 0] [1, 1, %size, 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> + %res = tensor.insert_slice %source_slice into %init[0, %c2] [%size, 1] [1, 1] : tensor<?x1xi32> into tensor<?x3xi32> + + return %res : tensor<?x3xi32> +} + +// CHECK-LABEL: func.func private @insert_slice_dynamic_source_and_dest_dim( +// CHECK-SAME: %[[SRC:.*]]: tensor<?x3x?x1xi32>, +// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<?x3xi32> { +// CHECK: %[[C_2:.*]] = arith.constant 2 : index +// CHECK: %[[INIT:.*]] = tensor.empty(%[[SIZE]]) : tensor<?x3xi32> +// CHECK: %[[SRC_SIZE:.*]] = tensor.extract_slice %[[SRC]][0, %[[C_2]], 0, 0] [1, 1, %[[SIZE]], 1] [1, 1, 1, 1] : tensor<?x3x?x1xi32> to tensor<?x1xi32> +// CHECK: %[[PAD:.*]] = arith.constant 0 : i32 +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[MASK:.*]] = vector.create_mask %[[SIZE]], %[[C1]] : vector<8x1xi1> +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.mask %[[MASK]] { vector.transfer_read %[[SRC_SIZE]]{{\[}}%[[C0]], %[[C0]]], %[[PAD]] : tensor<?x1xi32>, vector<8x1xi32> } : vector<8x1xi1> -> vector<8x1xi32> +// CHECK: %[[C_0_1:.*]] = arith.constant 0 : index +// CHECK: %[[WRITE:.*]] = vector.mask %[[MASK]] { vector.transfer_write %[[READ]], %[[INIT]]{{\[}}%[[C_0_1]], %[[C_2]]] : vector<8x1xi32>, tensor<?x3xi32> } : vector<8x1xi1> -> tensor<?x3xi32> +// CHECK: return %[[WRITE]] : tensor<?x3xi32> + + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [8, 1] : !transform.any_op + transform.yield + } + } diff --git a/mlir/test/Dialect/Linalg/vectorization/pad-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/pad-with-patterns.mlir new file mode 100644 index 0000000..4086d54 --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization/pad-with-patterns.mlir @@ -0,0 +1,227 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// Tests for tensor.pad +///---------------------------------------------------------------------------------------- + +// CHECK-LABEL: func @pad_static( +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[INIT:.*]] = tensor.empty() : tensor<2x3x4xf32> +// CHECK-DAG: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x3x4xf32> +// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]]{{.*}} : vector<2x3x4xf32>, tensor<2x3x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, false, true]} : tensor<2x?x2xf32>, vector<2x3x2xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32> +// CHECK: return %[[RESULT]] +func.func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor<2x?x2xf32> to tensor<2x3x4xf32> + return %0 : tensor<2x3x4xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @pad_static_source( +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32 +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK: %[[INIT:.*]] = tensor.empty() : tensor<2x6x4xf32> +// CHECK: %[[VEC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<2x6x4xf32> +// CHECK: %[[FILL:.*]] = vector.transfer_write %[[VEC]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<2x6x4xf32>, tensor<2x6x4xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true, true]} : tensor<2x5x2xf32>, vector<2x5x2xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32> +// CHECK: return %[[WRITE]] +func.func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : f32 + } : tensor<2x5x2xf32> to tensor<2x6x4xf32> + return %0 : tensor<2x6x4xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + + +// ----- + +// CHECK-LABEL: func @pad_static_dynamic( +// CHECK-SAME: %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index +// CHECK: %[[V0:.*]] = arith.addi %[[LOW]], %[[C2]] : index +// CHECK: %[[V1:.*]] = arith.addi %[[V0]], %[[C3]] : index +// CHECK: %[[V2:.*]] = arith.addi %[[HIGH]], %[[C5]] : index +// CHECK: %[[DIM3:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> +// CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index +// CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index +// CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> +// CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> +// CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> +// CHECK: return %[[RESULT]] +func.func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, + %pad_value: f32) -> tensor<6x?x?x?xf32> { + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + return %0 : tensor<6x?x?x?xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @pad_static_complex( +// CHECK-NOT: vector< +func.func @pad_static_complex(%arg0: tensor<2x5x2xcomplex<f32>>, %pad_value: complex<f32>) -> tensor<2x6x4xcomplex<f32>> { + %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] { + ^bb0(%arg1: index, %arg2: index, %arg3: index): + tensor.yield %pad_value : complex<f32> + } : tensor<2x5x2xcomplex<f32>> to tensor<2x6x4xcomplex<f32>> + return %0 : tensor<2x6x4xcomplex<f32>> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +func.func private @make_vector() -> tensor<12x13xf32> + +// CHECK-LABEL: func.func @pad_and_insert_slice_dest( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { +// CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[CST:.*]] = arith.constant dense<5.000000e+00> : vector<1x12x13xf32> +// CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[PAD_VAL:.*]] = arith.constant 5.000000e+00 : f32 +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> +// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %[[CST]], %[[EMPTY]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x12x13xf32>, tensor<1x12x13xf32> +// CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]], %[[PAD_VAL]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> +// CHECK: %[[WRITE_2:.*]] = vector.transfer_write %[[READ_1]], %[[WRITE_1]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> +// CHECK: %[[MAKE_VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[READ_2:.*]] = vector.transfer_read %[[MAKE_VEC]]{{\[}}%[[C0_IDX]], %[[C0_IDX]]], %[[C0]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_2]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32> +// CHECK: return %[[RES]] : tensor<1x12x13xf32> +func.func @pad_and_insert_slice_dest( + %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %c5 : f32 + } : tensor<1x5x6xf32> to tensor<1x12x13xf32> + %1 = call @make_vector() : () -> tensor<12x13xf32> + %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> + return %r : tensor<1x12x13xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @pad_tensor_non_const_pad_value +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK: %[[FILL:.*]] = tensor.generate +// CHECK: %[[RES:.*]] = arith.mulf +// CHECK: tensor.yield %[[RES]] : f32 +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %{{.*}} {in_bounds = [true, true]} : tensor<5x6xf32>, vector<5x6xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C3]], %[[C4]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<12x13xf32> +// CHECK: return %[[WRITE]] +func.func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[3, 4] high[4, 3] { + ^bb0(%arg1: index, %arg2: index): + %i1 = arith.index_cast %arg1 : index to i32 + %i2 = arith.index_cast %arg2 : index to i32 + %f1 = arith.sitofp %i1 : i32 to f32 + %f2 = arith.sitofp %i2 : i32 to f32 + %m = arith.mulf %f1, %f2 : f32 + tensor.yield %m : f32 + } : tensor<5x6xf32> to tensor<12x13xf32> + return %0 : tensor<12x13xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_masked_pad_static_dynamic +func.func @test_masked_pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index, + %pad_value: f32) -> tensor<6x?x?x?xf32> { + // CHECK: tensor.pad + %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] { + ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): + tensor.yield %pad_value : f32 + } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32> + return %0 : tensor<6x?x?x?xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization/pad.mlir b/mlir/test/Dialect/Linalg/vectorization/pad.mlir new file mode 100644 index 0000000..6bbb7ab --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization/pad.mlir @@ -0,0 +1,131 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// Tests for tensor.pad +///---------------------------------------------------------------------------------------- + +// CHECK-LABEL: func @test_masked_vectorize_pad +func.func @test_masked_vectorize_pad( + %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) + -> tensor<2x4xf32> +{ + // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[c0_0:.*]] = arith.constant 0 : index + // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> + // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_0]], %[[c0_0]]], %[[c42]] + // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> + // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> + // CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32> + // CHECK: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_1]], %[[c0_1]]] + // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<2x4xf32> + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor<?x?xf32> to tensor<2x4xf32> + return %1: tensor<2x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK: func @test_masked_vectorize_dynamic_pad +func.func @test_masked_vectorize_dynamic_pad( + %0 : tensor<?x?xf32>, %h0 : index, %h1 : index) + -> tensor<?x?xf32> +{ + // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[res_d0:.+]] = affine.apply #[[MAP]]() + // CHECK-DAG: %[[res_d1:.+]] = affine.apply #[[MAP]]() + // CHECK: %[[c0_2:.*]] = arith.constant 0 : index + // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> + // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]] + // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32> + // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> + // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[res_d0]], %[[res_d1]]) : tensor<?x?xf32> + // CHECK-DAG: %[[c0_3:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[d2:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?xf32> + // CHECK-DAG: %[[d3:.*]] = tensor.dim %[[empty]], {{.*}} : tensor<?x?xf32> + // CHECK: %[[mask_2:.*]] = vector.create_mask %[[d2]], %[[d3]] : vector<2x4xi1> + // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_2]] { + // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_3]], %[[c0_3]]] + // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<?x?xf32> + // CHECK: return %[[masked_write]] : tensor<?x?xf32> + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor<?x?xf32> to tensor<?x?xf32> + return %1: tensor<?x?xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + +// ----- +// This case is supported because low padding `%l0` is applied on +// a unit dimension which is supported, non unit result dimension low +// padding is currently unsupported. +// CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim +func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim( + %0 : tensor<?x?xf32>, %h0 : index, %h1 : index, %l0 : index) + -> tensor<1x4xf32> +{ + // CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[C0_1:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32> + // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1> + // CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]] + // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32> + // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32> + // CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32> + // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index + // CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]] + // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> + // CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32> + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor<?x?xf32> to tensor<1x4xf32> + return %1: tensor<1x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op + transform.yield + } +} |