diff options
author | Andrzej WarzyĆski <andrzej.warzynski@arm.com> | 2025-07-21 19:31:43 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-21 19:31:43 +0100 |
commit | 8940ab510ca56e0d87ab1e6a1d6cd26df3405f10 (patch) | |
tree | ff7d74464ab1b95b1632dacb82090758a513dcc8 | |
parent | ce44f089ded833acde529dbf448732a486207d5f (diff) | |
download | llvm-8940ab510ca56e0d87ab1e6a1d6cd26df3405f10.zip llvm-8940ab510ca56e0d87ab1e6a1d6cd26df3405f10.tar.gz llvm-8940ab510ca56e0d87ab1e6a1d6cd26df3405f10.tar.bz2 |
[mlir][linalg][nfc] Group tests for linalg.pack + linalg.unpack (#149783)
Groups vectorization tests for `linalg.pack` + `linalg.unpack` together.
-rw-r--r-- | mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir | 664 |
1 files changed, 337 insertions, 327 deletions
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir index 4fc39e2..98e8f50 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops.mlir @@ -933,116 +933,210 @@ module attributes {transform.with_named_sequence} { } } +// ----- + ///---------------------------------------------------------------------------------------- -/// Tests for other Ops +/// Tests for linalg.unpack ///---------------------------------------------------------------------------------------- -// ----- - -func.func @vectorize_dynamic_fill(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> { - %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32> - return %0 : tensor<?x?xf32> +// CHECK-LABEL: func @test_vectorize_dynamic_shapes_unpack +// CHECK-SAME: %[[ARG_0:.*]]: tensor<?x?xf32>, +func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?xf32> { +// CHECK: %[[C0:.*]] = arith.constant 0 +// CHECK: %[[DIM:.*]] = tensor.dim %arg0, %[[C0]] : tensor<?x?xf32> +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM0:.*]] = tensor.dim %arg0, %[[C1]] : tensor<?x?xf32> +// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 +// CHECK: %[[C01:.*]] = arith.constant 0 +// CHECK: %[[C02:.*]] = arith.constant 0 +// CHECK: %[[DIM4:.*]] = tensor.dim %arg1, %[[C02]] : tensor<?x?x16x2xf32> +// CHECK: %[[CNST14:.*]] = arith.constant 1 +// CHECK: %[[DIM6:.*]] = tensor.dim %arg1, %[[CNST14]] : tensor<?x?x16x2xf32> +// CHECK: %[[CNST16:.*]] = arith.constant 16 : index +// CHECK: %[[CNST2:.*]] = arith.constant 2 : index +// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1> +// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor<?x?x16x2xf32>, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32> +// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 3, 1, 2] : vector<2x1x16x2xf32> to vector<2x2x1x16xf32> +// CHECK: %[[sc0:.*]] = vector.shape_cast %[[trans0]] : vector<2x2x1x16xf32> to vector<4x16xf32> +// CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1> +// CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[ARG_0]] +// CHECK: return %[[write0]] + %ret = linalg.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32> + return %ret : tensor<?x?xf32> +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op + transform.yield + } } -// CHECK-LABEL: func.func @vectorize_dynamic_fill -// CHECK: %[[DIM0:.*]] = tensor.dim -// CHECK: %[[DIM1:.*]] = tensor.dim -// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<8x16xi1> -// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<8x16xf32> -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<8x16xf32>, tensor<?x?xf32> } : vector<8x16xi1> +// ----- -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [8, 16] : !transform.any_op +// CHECK-LABEL: func @test_vectorize_unpack +// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> +func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]]= arith.constant 0 : index + // CHECK: %[[C8:.*]] = arith.constant 8 : index + // CHECK: %[[C80:.*]] = arith.constant 8 : index + // CHECK: %[[C32:.*]] = arith.constant 32 : index + // CHECK: %[[C16:.*]] = arith.constant 16 : index + // CHECK: %[[MSK0:.*]] = vector.create_mask %[[C8]], %[[C80]], %[[C32]], %[[C16]] : vector<16x8x32x16xi1> + // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] { vector.transfer_read %[[SRC]]{{.*}}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> + // CHECK: %[[TRANSP0:.*]] = vector.transpose %[[READ0]], [0, 2, 1, 3] : vector<16x8x32x16xf32> to vector<16x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP0]] : vector<16x32x8x16xf32> to vector<512x128xf32> + // CHECK: %[[C01:.*]] = arith.constant 0 : index + // CHECK: %[[C256:.*]] = arith.constant 256 : index + // CHECK: %[[C128:.*]] = arith.constant 128 : index + // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1> + // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] { vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<512x128xi1> -> tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op transform.yield } } // ----- -// NOTE: Often, non-trailing scalable sizes are problematic - there are no -// "scalable" arrays of vectors at the LLVM level (multi-dim vectors are -// decomposed into arrays of aggregates). However, the trailing dim in this -// case is 1 and that can be folded away later. +// CHECK-LABEL: func @test_vectorize_unpack_no_masks +// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> +func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op + transform.yield + } + } -// NOTE: This is similar to the example above, but the trailing dim was set to -// 1 to make it foldable + vectorizable. +// ----- -func.func @vectorize_dynamic_fill_scalable(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> { - %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32> - return %0 : tensor<?x?xf32> +// CHECK-LABEL: test_vectorize_unpack_with_outer_perm +// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> + func.func @test_vectorize_unpack_with_outer_perm(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op + transform.yield + } } -// CHECK-LABEL: func.func @vectorize_dynamic_fill_scalable -// CHECK: %[[DIM0:.*]] = tensor.dim -// CHECK: %[[DIM1:.*]] = tensor.dim -// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[8]x1xi1> -// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<[8]x1xf32> -// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<[8]x1xf32>, tensor<?x?xf32> } : vector<[8]x1xi1> +// ----- -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [[8], 1] : !transform.any_op +// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes +// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> +func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> + // CHECK: return %[[WRIT]] : tensor<256x128xf32> + %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> + return %0 : tensor<256x128xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op transform.yield } -} + } // ----- -// CHECK: #[[MAP:.*]] = affine_map<(d0, d1) -> (d1, d0)> -// CHECK: func @test_masked_vectorize_linalg_transpose -func.func @test_masked_vectorize_linalg_transpose(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[D0:.*]] = tensor.dim %arg0, %[[C0]] - // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index - // CHECK-DAG: %[[D1:.*]] = tensor.dim %arg0, %[[C1]] - // CHECK: %[[MASK0:.*]] = vector.create_mask %[[D0]], %[[D1]] - // CHECK: %[[LOAD:.*]] = vector.mask %[[MASK0]] { vector.transfer_read %arg0{{.+}} permutation_map = #[[MAP]]{{.+}} } - // CHECK-SAME: vector<4x2xi1> -> vector<2x4xf32> - // CHECK: %[[MASK1:.*]] = vector.create_mask %[[D1]], %[[D0]] - // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK1]] { vector.transfer_write %[[LOAD]], %arg1{{.+}} } - // CHECK-SAME: vector<2x4xi1> -> tensor<?x?xf32> - // CHECK: return %[[WRITE]] - %0 = linalg.transpose ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) permutation = [1, 0] - return %0 : tensor<?x?xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op +// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes_slice_output +// CHECK-SAME: %[[SRC:.*]]: tensor<8x4x16x16xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<64x127xf32> +func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) -> tensor<64x127xf32> { + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x4x16x16xf32> to vector<4x16x8x16xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<4x16x8x16xf32> to vector<64x128xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]] + // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32> + // CHECK: return %[[WRIT]] : tensor<64x127xf32> + %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> + return %0 : tensor<64x127xf32> + } + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op transform.yield } -} + } // ----- -// CHECK-LABEL: func @test_masked_vectorize_linalg_copy -func.func @test_masked_vectorize_linalg_copy(%A : memref<?x?xf32>, %B : memref<?x?xf32>) { - // CHECK: %[[c0:.*]] = arith.constant 0 : index - // CHECK: %[[d0:.*]] = memref.dim %{{.*}}, %[[c0]] : memref<?x?xf32> - // CHECK: %[[c1:.*]] = arith.constant 1 : index - // CHECK: %[[d1:.*]] = memref.dim %{{.*}}, %[[c1]] : memref<?x?xf32> - // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> - // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<?x?xf32>, vector<2x4xf32> } : vector<2x4xi1> -> vector<2x4xf32> - // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<2x4xf32>, memref<?x?xf32> } : vector<2x4xi1> - linalg.copy ins(%A : memref<?x?xf32>) outs(%B : memref<?x?xf32>) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op +// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes_permute +// CHECK-SAME: %[[SRC:.*]]: tensor<4x7x4xf32> +// CHECK-SAME: %[[DEST:.*]]: tensor<7x16xf32> +func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> { + %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> + return %0 : tensor<7x16xf32> + } + // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<4x7x4xf32>, vector<4x7x4xf32> + // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 0, 2] : vector<4x7x4xf32> to vector<7x4x4xf32> + // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<7x4x4xf32> to vector<7x16xf32> + // CHECK: %[[C00:.*]] = arith.constant 0 : index + // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<7x16xf32>, tensor<7x16xf32> + // CHECK: return %[[WRIT]] : tensor<7x16xf32> + module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op transform.yield } -} - + } // ----- +///---------------------------------------------------------------------------------------- +/// Tests for linalg.pack +///---------------------------------------------------------------------------------------- + // Input identical as the test in vectorization-with-patterns.mlir. Output is // different - vector sizes are inferred (rather than user-specified) and hence // masking was used. @@ -1153,6 +1247,171 @@ module attributes {transform.with_named_sequence} { // ----- +// CHECK-LABEL: test_vectorize_pack_no_vector_sizes +func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { + %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> + return %pack : tensor<2x4x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> +// CHECK: return %[[write]] : tensor<2x4x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes +func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { + %pad = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + return %pack : tensor<32x4x1x16x2xf32> +} +// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index +// CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] +// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> +// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> +// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] +// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 : !transform.any_op + transform.yield + } +} + + +///---------------------------------------------------------------------------------------- +/// Tests for other Ops +///---------------------------------------------------------------------------------------- + +// ----- + +func.func @vectorize_dynamic_fill(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> { + %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32> + return %0 : tensor<?x?xf32> +} + +// CHECK-LABEL: func.func @vectorize_dynamic_fill +// CHECK: %[[DIM0:.*]] = tensor.dim +// CHECK: %[[DIM1:.*]] = tensor.dim +// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<8x16xi1> +// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<8x16xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<8x16xf32>, tensor<?x?xf32> } : vector<8x16xi1> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [8, 16] : !transform.any_op + transform.yield + } +} + +// ----- + +// NOTE: Often, non-trailing scalable sizes are problematic - there are no +// "scalable" arrays of vectors at the LLVM level (multi-dim vectors are +// decomposed into arrays of aggregates). However, the trailing dim in this +// case is 1 and that can be folded away later. + +// NOTE: This is similar to the example above, but the trailing dim was set to +// 1 to make it foldable + vectorizable. + +func.func @vectorize_dynamic_fill_scalable(%A : tensor<?x?xf32>, %arg0 : f32) -> tensor<?x?xf32> { + %0 = linalg.fill ins(%arg0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32> + return %0 : tensor<?x?xf32> +} + +// CHECK-LABEL: func.func @vectorize_dynamic_fill_scalable +// CHECK: %[[DIM0:.*]] = tensor.dim +// CHECK: %[[DIM1:.*]] = tensor.dim +// CHECK: %[[MASK:.*]] = vector.create_mask %[[DIM0]], %[[DIM1]] : vector<[8]x1xi1> +// CHECK: %[[BCAST:.*]] = vector.broadcast %{{.*}} : f32 to vector<[8]x1xf32> +// CHECK: vector.mask %[[MASK]] { vector.transfer_write %[[BCAST]], {{.*}} {in_bounds = [true, true]} : vector<[8]x1xf32>, tensor<?x?xf32> } : vector<[8]x1xi1> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [[8], 1] : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK: #[[MAP:.*]] = affine_map<(d0, d1) -> (d1, d0)> +// CHECK: func @test_masked_vectorize_linalg_transpose +func.func @test_masked_vectorize_linalg_transpose(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[D0:.*]] = tensor.dim %arg0, %[[C0]] + // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index + // CHECK-DAG: %[[D1:.*]] = tensor.dim %arg0, %[[C1]] + // CHECK: %[[MASK0:.*]] = vector.create_mask %[[D0]], %[[D1]] + // CHECK: %[[LOAD:.*]] = vector.mask %[[MASK0]] { vector.transfer_read %arg0{{.+}} permutation_map = #[[MAP]]{{.+}} } + // CHECK-SAME: vector<4x2xi1> -> vector<2x4xf32> + // CHECK: %[[MASK1:.*]] = vector.create_mask %[[D1]], %[[D0]] + // CHECK: %[[WRITE:.*]] = vector.mask %[[MASK1]] { vector.transfer_write %[[LOAD]], %arg1{{.+}} } + // CHECK-SAME: vector<2x4xi1> -> tensor<?x?xf32> + // CHECK: return %[[WRITE]] + %0 = linalg.transpose ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) permutation = [1, 0] + return %0 : tensor<?x?xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_masked_vectorize_linalg_copy +func.func @test_masked_vectorize_linalg_copy(%A : memref<?x?xf32>, %B : memref<?x?xf32>) { + // CHECK: %[[c0:.*]] = arith.constant 0 : index + // CHECK: %[[d0:.*]] = memref.dim %{{.*}}, %[[c0]] : memref<?x?xf32> + // CHECK: %[[c1:.*]] = arith.constant 1 : index + // CHECK: %[[d1:.*]] = memref.dim %{{.*}}, %[[c1]] : memref<?x?xf32> + // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1> + // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_read %{{.*}} {in_bounds = [true, true]} : memref<?x?xf32>, vector<2x4xf32> } : vector<2x4xi1> -> vector<2x4xf32> + // CHECK: vector.mask %[[mask]] {{.*}} vector.transfer_write %{{.*}} {in_bounds = [true, true]} : vector<2x4xf32>, memref<?x?xf32> } : vector<2x4xi1> + linalg.copy ins(%A : memref<?x?xf32>) outs(%B : memref<?x?xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + + + +// ----- + func.func @matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) { linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>) outs(%C: memref<?x?xf32>) @@ -1223,252 +1482,3 @@ module attributes {transform.with_named_sequence} { transform.yield } } - -// ----- - -// CHECK-LABEL: func @test_vectorize_dynamic_shapes_unpack -// CHECK-SAME: %[[ARG_0:.*]]: tensor<?x?xf32>, -func.func @test_vectorize_dynamic_shapes_unpack(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?x16x2xf32>) -> tensor<?x?xf32> { -// CHECK: %[[C0:.*]] = arith.constant 0 -// CHECK: %[[DIM:.*]] = tensor.dim %arg0, %[[C0]] : tensor<?x?xf32> -// CHECK: %[[C1:.*]] = arith.constant 1 : index -// CHECK: %[[DIM0:.*]] = tensor.dim %arg0, %[[C1]] : tensor<?x?xf32> -// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 -// CHECK: %[[C01:.*]] = arith.constant 0 -// CHECK: %[[C02:.*]] = arith.constant 0 -// CHECK: %[[DIM4:.*]] = tensor.dim %arg1, %[[C02]] : tensor<?x?x16x2xf32> -// CHECK: %[[CNST14:.*]] = arith.constant 1 -// CHECK: %[[DIM6:.*]] = tensor.dim %arg1, %[[CNST14]] : tensor<?x?x16x2xf32> -// CHECK: %[[CNST16:.*]] = arith.constant 16 : index -// CHECK: %[[CNST2:.*]] = arith.constant 2 : index -// CHECK: %[[readMsk0:.*]] = vector.create_mask %[[DIM4]], %[[DIM6]], %[[CNST16]], %[[CNST2]] : vector<2x1x16x2xi1> -// CHECK: %[[read0:.*]] = vector.mask %[[readMsk0]] {{.*}} vector.transfer_read %{{.*}} : tensor<?x?x16x2xf32>, vector<2x1x16x2xf32> } : vector<2x1x16x2xi1> -> vector<2x1x16x2xf32> -// CHECK: %[[trans0:.*]] = vector.transpose %[[read0]], [0, 3, 1, 2] : vector<2x1x16x2xf32> to vector<2x2x1x16xf32> -// CHECK: %[[sc0:.*]] = vector.shape_cast %[[trans0]] : vector<2x2x1x16xf32> to vector<4x16xf32> -// CHECK: %[[writeMsk0:.*]] = vector.create_mask {{.*}} : vector<4x16xi1> -// CHECK: %[[write0:.*]] = vector.mask %[[writeMsk0:.*]] {{.*}} vector.transfer_write %[[sc0]], %[[ARG_0]] -// CHECK: return %[[write0]] - %ret = linalg.unpack %arg1 inner_dims_pos = [1, 0] inner_tiles = [16, 2] into %arg0 : tensor<?x?x16x2xf32> -> tensor<?x?xf32> - return %ret : tensor<?x?xf32> -} -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [4, 16] : !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_unpack -// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> -func.func @test_vectorize_unpack(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]]= arith.constant 0 : index - // CHECK: %[[C8:.*]] = arith.constant 8 : index - // CHECK: %[[C80:.*]] = arith.constant 8 : index - // CHECK: %[[C32:.*]] = arith.constant 32 : index - // CHECK: %[[C16:.*]] = arith.constant 16 : index - // CHECK: %[[MSK0:.*]] = vector.create_mask %[[C8]], %[[C80]], %[[C32]], %[[C16]] : vector<16x8x32x16xi1> - // CHECK: %[[READ0:.*]] = vector.mask %[[MSK0]] { vector.transfer_read %[[SRC]]{{.*}}} : vector<16x8x32x16xi1> -> vector<16x8x32x16xf32> - // CHECK: %[[TRANSP0:.*]] = vector.transpose %[[READ0]], [0, 2, 1, 3] : vector<16x8x32x16xf32> to vector<16x32x8x16xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP0]] : vector<16x32x8x16xf32> to vector<512x128xf32> - // CHECK: %[[C01:.*]] = arith.constant 0 : index - // CHECK: %[[C256:.*]] = arith.constant 256 : index - // CHECK: %[[C128:.*]] = arith.constant 128 : index - // CHECK: %[[WRITEMSK:.*]] = vector.create_mask %[[C256]], %[[C128]] : vector<512x128xi1> - // CHECK: %[[WRIT:.*]] = vector.mask %[[WRITEMSK]] { vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<512x128xi1> -> tensor<256x128xf32> - // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> - return %0 : tensor<256x128xf32> - } - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [512, 128] : !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_unpack_no_masks -// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> -func.func @test_vectorize_unpack_no_masks(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> - // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> - // CHECK: %[[C00:.*]] = arith.constant 0 : index - // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> - // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> - return %0 : tensor<256x128xf32> - } - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op - transform.yield - } - } - - // ----- - -// CHECK-LABEL: test_vectorize_unpack_with_outer_perm -// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> - func.func @test_vectorize_unpack_with_outer_perm(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> - // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> - // CHECK: %[[C00:.*]] = arith.constant 0 : index - // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> - // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> - return %0 : tensor<256x128xf32> - } - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 vector_sizes [256, 128] : !transform.any_op - transform.yield - } -} - - // ----- - -// CHECK-LABEL: test_vectorize_pack_no_vector_sizes -func.func @test_vectorize_pack_no_vector_sizes(%arg0: tensor<64x4xf32>, %arg1: tensor<2x4x16x2xf32>) -> tensor<2x4x16x2xf32> { - %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 2] into %arg1 : tensor<64x4xf32> -> tensor<2x4x16x2xf32> - return %pack : tensor<2x4x16x2xf32> -} -// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK: %[[read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, true]} : tensor<64x4xf32>, vector<64x4xf32> -// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[read]] : vector<64x4xf32> to vector<4x16x2x2xf32> -// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [2, 0, 1, 3] : vector<4x16x2x2xf32> to vector<2x4x16x2xf32> -// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4x16x2xf32> -// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<2x4x16x2xf32>, tensor<2x4x16x2xf32> -// CHECK: return %[[write]] : tensor<2x4x16x2xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 : !transform.any_op - transform.yield - } -} - - // ----- - -// CHECK-LABEL: test_vectorize_padded_pack_no_vector_sizes -func.func @test_vectorize_padded_pack_no_vector_sizes(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { - %pad = arith.constant 0.000000e+00 : f32 - %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> - return %pack : tensor<32x4x1x16x2xf32> -} -// CHECK-DAG: %[[cst:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index -// CHECK: %[[transfer_read:.*]] = vector.transfer_read %{{.*}}[%[[c0]], %[[c0]], %[[c0]]], %[[cst]] -// CHECK-SAME: {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> -// CHECK: %[[shape_cast:.*]] = vector.shape_cast %[[transfer_read]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> -// CHECK: %[[transpose:.*]] = vector.transpose %[[shape_cast]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> -// CHECK-DAG: %[[c0_1:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> -// CHECK: %[[write:.*]] = vector.transfer_write %[[transpose]], %[[empty]][%[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]], %[[c0_1]]] -// CHECK-SAME: {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> -// CHECK: return %[[write]] : tensor<32x4x1x16x2xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 : !transform.any_op - transform.yield - } -} - - // ----- - -// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes -// CHECK-SAME: %[[SRC:.*]]: tensor<8x8x32x16xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<256x128xf32> -func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32> - // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32> - // CHECK: %[[C00:.*]] = arith.constant 0 : index - // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<256x128xf32>, tensor<256x128xf32> - // CHECK: return %[[WRIT]] : tensor<256x128xf32> - %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32> - return %0 : tensor<256x128xf32> - } - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 : !transform.any_op - transform.yield - } - } - - // ----- - -// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes_slice_output -// CHECK-SAME: %[[SRC:.*]]: tensor<8x4x16x16xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<64x127xf32> -func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) -> tensor<64x127xf32> { - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32> - // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x4x16x16xf32> to vector<4x16x8x16xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<4x16x8x16xf32> to vector<64x128xf32> - // CHECK: %[[C00:.*]] = arith.constant 0 : index - // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]] - // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32> - // CHECK: return %[[WRIT]] : tensor<64x127xf32> - %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32> - return %0 : tensor<64x127xf32> - } - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 : !transform.any_op - transform.yield - } - } - -// ----- - -// CHECK-LABEL: test_vectorize_unpack_no_vector_sizes_permute -// CHECK-SAME: %[[SRC:.*]]: tensor<4x7x4xf32> -// CHECK-SAME: %[[DEST:.*]]: tensor<7x16xf32> -func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> { - %0 = linalg.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32> - return %0 : tensor<7x16xf32> - } - // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32 - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{.*}}} : tensor<4x7x4xf32>, vector<4x7x4xf32> - // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 0, 2] : vector<4x7x4xf32> to vector<7x4x4xf32> - // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<7x4x4xf32> to vector<7x16xf32> - // CHECK: %[[C00:.*]] = arith.constant 0 : index - // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[DEST]]{{.*}}} : vector<7x16xf32>, tensor<7x16xf32> - // CHECK: return %[[WRIT]] : tensor<7x16xf32> - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - transform.structured.vectorize %0 : !transform.any_op - transform.yield - } - } |