aboutsummaryrefslogtreecommitdiff
path: root/mlir/test/Interfaces
diff options
context:
space:
mode:
authorMahesh Ravishankar <ravishankarm@google.com>2022-07-15 20:11:23 +0000
committerMahesh Ravishankar <ravishankarm@google.com>2022-07-21 05:05:06 +0000
commit485190df95f98c51c3f4a4ab4db96127cdc9ce78 (patch)
tree76d75e83b0d590dead2f12140490289a9c8d9a83 /mlir/test/Interfaces
parenta4c62f66545d11253ef76384b78d9dc196fc8e0e (diff)
downloadllvm-485190df95f98c51c3f4a4ab4db96127cdc9ce78.zip
llvm-485190df95f98c51c3f4a4ab4db96127cdc9ce78.tar.gz
llvm-485190df95f98c51c3f4a4ab4db96127cdc9ce78.tar.bz2
[mlir][Linalg] Deprecate `tileAndFuseLinalgOps` method and associated patterns.
The `tileAndFuseLinalgOps` is a legacy approach for tiling + fusion of Linalg operations. Since it was also intended to work on operations with buffer operands, this method had fairly complex logic to make sure tile and fuse was correct even with side-effecting linalg ops. While complex, it still wasnt robust enough. This patch deprecates this method and thereby deprecating the tiling + fusion method for ops with buffer semantics. Note that the core transformation to do fusion of a producer with a tiled consumer still exists. The deprecation here only removes methods that auto-magically tried to tile and fuse correctly in presence of side-effects. The `tileAndFuseLinalgOps` also works with operations with tensor semantics. There are at least two other ways the same functionality exists. 1) The `tileConsumerAndFuseProducers` method. This does a similar transformation, but using a slightly different logic to automatically figure out the legal tile + fuse code. Note that this is also to be deprecated soon. 2) The prefered way uses the `TilingInterface` for tile + fuse, and relies on the caller to set the tiling options correctly to ensure that the generated code is correct. As proof that (2) is equivalent to the functionality provided by `tileAndFuseLinalgOps`, relevant tests have been moved to use the interface, where the test driver sets the tile sizes appropriately to generate the expected code. Differential Revision: https://reviews.llvm.org/D129901
Diffstat (limited to 'mlir/test/Interfaces')
-rw-r--r--mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir171
1 files changed, 171 insertions, 0 deletions
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 81e2bfb..d1ca2d2 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -230,3 +230,174 @@ func.func @interchange_matmul_fusion(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?
// CHECK-SAME: outs(%[[INIT_TILE_2]] :
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
// CHECK scf.yield %[[INSERT]]
+
+// -----
+
+func.func @matmul_plus_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
+ %arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
+ %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
+ %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
+ %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
+ %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+ %6 = linalg.generic
+ {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"],
+ __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
+ ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%5 : tensor<?x?xf32>) {
+ ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
+ %7 = arith.addf %arg3, %arg4 : f32
+ linalg.yield %7 : f32
+ } -> tensor<?x?xf32>
+ return %6 : tensor<?x?xf32>
+}
+// This fuses as expected but the gemm operation is inlined twice. It should be CSE-d but isnt today.
+
+// CHECK: func @matmul_plus_matmul
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK: %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-Z0-9_]+]]
+// CHECK-SAME: iter_args(%[[ARG4:.+]] = %{{[a-zA-Z0-9_]+}})
+// CHECK: %[[YIELD:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]]
+// CHECK-SAME: iter_args(%[[ARG6:.+]] = %[[ARG4]])
+// CHECK-DAG: %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
+// CHECK-DAG: %[[ST_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
+// CHECK-DAG: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
+// CHECK: %[[LHS:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] :
+// CHECK-SAME: outs(%[[ST_ARG2]] :
+// CHECK-DAG: %[[ST_ARG0_1:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
+// CHECK-DAG: %[[ST_ARG1_1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
+// CHECK-DAG: %[[ST_ARG2_1:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
+// CHECK: %[[RHS:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[ST_ARG0_1]], %[[ST_ARG1_1]] :
+// CHECK-SAME: outs(%[[ST_ARG2_1]] :
+// CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
+// CHECK: %[[ST_RESULT:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[LHS]], %[[RHS]] :
+// CHECK-SAME: outs(%[[ST_ARG6]] :
+// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
+// CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]]
+// CHECK: scf.yield %[[UPDATE]]
+// CHECK: scf.yield %[[YIELD]]
+// CHECK: return %[[RESULT]]
+
+// -----
+
+func.func @matmul_plus_transpose_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
+ %arg2: tensor<?x?xf32>) -> tensor<?x?xf32>{
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
+ %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
+ %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+ %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
+ %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
+ %5 = linalg.init_tensor [%3, %4] : tensor<?x?xf32>
+ %6 = linalg.generic
+ {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+ affine_map<(d0, d1) -> (d1, d0)>,
+ affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"],
+ __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
+ ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%5 : tensor<?x?xf32>) {
+ ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
+ %7 = arith.addf %arg3, %arg4 : f32
+ linalg.yield %7 : f32
+ } -> tensor<?x?xf32>
+ return %6 : tensor<?x?xf32>
+}
+// CHECK: func @matmul_plus_transpose_matmul
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK: %[[RESULT:.+]] = scf.for %[[IV0:[a-zA-Z0-9_]+]]
+// CHECK-SAME: iter_args(%[[ARG4:.+]] = %{{[a-zA-Z0-9_]+}})
+// CHECK: %[[YIELD:.+]] = scf.for %[[IV1:[a-zA-Z0-9_]+]]
+// CHECK-SAME: iter_args(%[[ARG6:.+]] = %[[ARG4]])
+// CHECK-DAG: %[[ST_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
+// CHECK-DAG: %[[ST_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
+// CHECK-DAG: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
+// CHECK: %[[LHS:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]]
+// CHECK-SAME: : tensor<?x?xf32>, tensor<?x?xf32>)
+// CHECK-SAME: outs(%[[ST_ARG2]] : tensor<?x?xf32>)
+// CHECK-DAG: %[[STR_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0]
+// CHECK-DAG: %[[STR_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
+// CHECK-DAG: %[[STR_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV0]]]
+// CHECK: %[[RHS:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[STR_ARG0]], %[[STR_ARG1]] :
+// CHECK-SAME: outs(%[[STR_ARG2]] :
+// CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
+// CHECK: %[[ST_RESULT:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[LHS]], %[[RHS]] :
+// CHECK-SAME: outs(%[[ST_ARG6]] :
+// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
+// CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]]
+// CHECK: scf.yield %[[UPDATE]]
+// CHECK: scf.yield %[[YIELD]]
+// CHECK: return %[[RESULT]]
+
+// -----
+
+func.func @matmul_sequence_fusion(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>,
+ %arg2: tensor<?x?xf32>, %arg3: tensor<?x?xf32>, %arg4: tensor<?x?xf32>,
+ %arg5: tensor<?x?xf32>, %arg6: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N0] * [N0, N1]
+ %1 = linalg.matmul ins(%0, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg4 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N1] * [N1, N2]
+ %2 = linalg.matmul
+ {__internal_linalg_transform__ = "gemm_sequence_fusion"}
+ ins(%1, %arg5 : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%arg6 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N2] * [N2, N3]
+ return %2 : tensor<?x?xf32>
+}
+
+// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)>
+// CHECK: func @matmul_sequence_fusion(
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[N0:.+]] = tensor.dim %[[ARG0]], %[[C1]]
+// CHECK-DAG: %[[ORIG_GEMM1:.+]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] :
+// CHECK-DAG: %[[N1:.+]] = tensor.dim %[[ORIG_GEMM1]], %[[C1]]
+// CHECK-DAG: %[[ORIG_GEMM2:.+]] = linalg.matmul ins(%[[ORIG_GEMM1]], %[[ARG3]] :
+// CHECK-DAG: %[[M:.+]] = tensor.dim %[[ORIG_GEMM2]], %[[C0]]
+// CHECK-DAG: %[[N2:.+]] = tensor.dim %[[ORIG_GEMM2]], %[[C1]]
+// CHECK-DAG: %[[N3:.+]] = tensor.dim %[[ARG5]], %[[C1]]
+// CHECK: %[[R0:.+]] = scf.for %[[IV:[a-zA-Z0-9_]+]] =
+// CHECK-SAME: iter_args(%[[ARG8:.+]] = %[[ARG6]]) -> (tensor<?x?xf32>) {
+// CHECK-DAG: %[[TILE_M:.+]] = affine.min #[[MAP]](%[[IV]])[%{{.+}}, %[[M]]]
+// CHECK-DAG: %[[SLICE_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV]], 0] [%[[TILE_M]], %[[N0]]]
+// CHECK-DAG: %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]]
+// CHECK-DAG: %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]]
+// CHECK-DAG: %[[TILE_GEMM1:.+]] = linalg.matmul ins(%[[SLICE_ARG0]], %[[SLICE_ARG1]] :
+// CHECK-SAME: outs(%[[SLICE_ARG2]] :
+// CHECK-DAG: %[[SLICE_ARG3:.+]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[N1]], %[[N2]]]
+// CHECK-DAG: %[[SLICE_ARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV]], 0] [%[[TILE_M]], %[[N2]]]
+// CHECK-DAG: %[[TILE_GEMM2:.+]] = linalg.matmul ins(%[[TILE_GEMM1]], %[[SLICE_ARG3]] :
+// CHECK-SAME: outs(%[[SLICE_ARG4]] :
+// CHECK-DAG: %[[SLICE_ARG5:.+]] = tensor.extract_slice %[[ARG5]][0, 0] [%[[N2]], %[[N3]]]
+// CHECK-DAG: %[[SLICE_ARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
+// CHECK-DAG: %[[TILE_GEMM3:.+]] = linalg.matmul
+// CHECK-SAME: ins(%[[TILE_GEMM2]], %[[SLICE_ARG5]] :
+// CHECK-SAME: outs(%[[SLICE_ARG6]] :
+// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[TILE_GEMM3]] into %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
+// CHECK: scf.yield %[[UPDATE]]