//===- TestTilingInterface.cpp - Test tiling using `TilingInterface` -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass for testing tiling operations using // `TilingInterface`. // //===----------------------------------------------------------------------===// #include #include #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h" #include "mlir/Interfaces/TilingInterface.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; // TODO: this file should disappear and instead tests should make use of the // transform dialect. namespace { /// Marker used as attribute name in generated Linalg rewriting transformations. const StringLiteral kTransformMarker = "__internal_transform__"; /// Helper class to control application of linalg transformation patterns. /// Control comes in 2 forms: /// 1. attribute matching and setting behavior using the attribute named /// `kTransformMarker`. This can be used to build a state machine /// using attributes and incrementally applying patterns to advance states. /// 2. filter function, which is a simple lambda on the Operation* that /// returns a LogicalResult. struct TransformationFilter { using FilterFunction = std::function; explicit TransformationFilter( ArrayRef matchDisjunction = {}, std::optional replacement = std::nullopt); explicit TransformationFilter( const FilterFunction &f, ArrayRef matchDisjunction = {}, std::optional replacement = std::nullopt); TransformationFilter(TransformationFilter &&) = default; TransformationFilter(const TransformationFilter &) = default; LogicalResult checkAndNotify(PatternRewriter &rewriter, Operation *op) const; void replaceTransformationFilter(PatternRewriter &rewriter, Operation *op) const; TransformationFilter &addFilter(const FilterFunction &f) { if (f) filters.push_back(f); return *this; } template TransformationFilter &addOpFilter() { return addFilter( [](Operation *op) { return success(isa(op)); }); } TransformationFilter &addOpNameFilter(StringRef opName) { return addFilter([opName](Operation *op) { return success(op->getName().getStringRef() == opName); }); } TransformationFilter &setMatchByDefault() { matchByDefault = true; return *this; } private: SmallVector filters; SmallVector matchDisjunction; std::optional replacement; /// When set to true, if the attribute is not set, it will be treated as /// a match. Default is false. bool matchByDefault; }; TransformationFilter::TransformationFilter( ArrayRef matchDisjunction, std::optional replacement) : matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), replacement(replacement), matchByDefault(false) {} LogicalResult TransformationFilter::checkAndNotify(PatternRewriter &rewriter, Operation *op) const { if (llvm::any_of(filters, [&](const FilterFunction &f) { return failed(f(op)); })) return failure(); auto attr = op->template getAttrOfType(kTransformMarker); if (!attr) { // 1. Has no filter case and matchDisjunction is empty. if (matchDisjunction.empty() || matchByDefault) return success(); // 2. Has no filter but was expecting a filter. return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { diag << " does not have any filter from list: "; interleaveComma(matchDisjunction, diag); }); } // 4. Match explicit filter. for (auto filter : matchDisjunction) if (attr.getValue() == filter) return success(); // 5. Fail to match. return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { diag << " does not have any filter from list: "; interleaveComma(matchDisjunction, diag); }); } void TransformationFilter::replaceTransformationFilter( PatternRewriter &rewriter, Operation *op) const { if (replacement.has_value()) op->setAttr(kTransformMarker, *replacement); else op->removeAttr(rewriter.getStringAttr(kTransformMarker)); } /// Pattern for testing `TileUsingSCFForOp` pattern (that tiles operations using /// the `TilingInterface` with `scf.for` ops for iterating over the tiles) while /// using a `filter` to avoid recursive application. struct TestTileUsingSCFForOp : public OpInterfaceRewritePattern { TestTileUsingSCFForOp(MLIRContext *context, scf::SCFTilingOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} /// Construct a generic pattern applied to `opName`. TestTileUsingSCFForOp(StringRef opName, MLIRContext *context, scf::SCFTilingOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} LogicalResult matchAndRewrite(TilingInterface op, PatternRewriter &rewriter) const override { if (failed(filter.checkAndNotify(rewriter, op))) return failure(); FailureOr tilingResult = scf::tileUsingSCFForOp(rewriter, op, options); if (failed(tilingResult)) return rewriter.notifyMatchFailure(op, "failed to tile operation"); if (op->getNumResults()) { rewriter.replaceOp(op, tilingResult->replacements); } else { rewriter.eraseOp(op); } for (auto *tiledOp : tilingResult->tiledOps) filter.replaceTransformationFilter(rewriter, tiledOp); return success(); } private: scf::SCFTilingOptions options; TransformationFilter filter; }; /// Pattern for testing `tileUsingSCFForallOp` (that tiles operations using /// the `TilingInterface` with `scf.forall` ops for iterating over the tiles) /// while using a `filter` to avoid recursive application. struct TestTileUsingSCFForallOp : public OpInterfaceRewritePattern { TestTileUsingSCFForallOp(MLIRContext *context, scf::SCFTilingOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} /// Construct a generic pattern applied to `opName`. TestTileUsingSCFForallOp(StringRef opName, MLIRContext *context, scf::SCFTilingOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} LogicalResult matchAndRewrite(TilingInterface op, PatternRewriter &rewriter) const override { if (failed(filter.checkAndNotify(rewriter, op))) return failure(); FailureOr tilingResult = scf::tileUsingSCFForallOp(rewriter, op, options); if (failed(tilingResult)) return rewriter.notifyMatchFailure(op, "failed to tile operation"); if (op->getNumResults()) { rewriter.replaceOp(op, tilingResult->replacements); } else { rewriter.eraseOp(op); } for (auto *tiledOp : tilingResult->tiledOps) filter.replaceTransformationFilter(rewriter, tiledOp); return success(); } private: scf::SCFTilingOptions options; TransformationFilter filter; }; /// Pattern for testing `TileConsumerAndFuseProducersUsingSCFForOp` pattern /// (that tiles and fuses operations using the `TilingInterface` with `scf.for` /// ops for iterating over the tiles) while using a `filter` to avoid recursive /// application. struct TestTileConsumerAndFuseProducersGreedilyUsingSCFForOp : public OpInterfaceRewritePattern { TestTileConsumerAndFuseProducersGreedilyUsingSCFForOp( MLIRContext *context, scf::SCFTileAndFuseOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} /// Construct a generic pattern applied to `opName`. TestTileConsumerAndFuseProducersGreedilyUsingSCFForOp( StringRef opName, MLIRContext *context, scf::SCFTileAndFuseOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} LogicalResult matchAndRewrite(TilingInterface op, PatternRewriter &rewriter) const override { if (failed(filter.checkAndNotify(rewriter, op))) return failure(); FailureOr tileAndFuseResult = scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp(rewriter, op, options); if (failed(tileAndFuseResult)) { return failure(); } // Replace the tiled op with replacements. SmallVector replacements(op->getNumResults()); for (const auto &result : llvm::enumerate(op->getResults())) { replacements[result.index()] = tileAndFuseResult->replacements.lookup(result.value()); } rewriter.replaceOp(op, replacements); filter.replaceTransformationFilter( rewriter, tileAndFuseResult->tiledAndFusedOps.front()); return success(); } private: scf::SCFTileAndFuseOptions options; TransformationFilter filter; }; /// Pattern to tile a consumer and fuse producer with it /// while reconstructing the value of the fused producer /// from within the loop nest to replace any external /// uses of the producer. In general yielding the producer /// this way requires a guarantee that the slice of the producer /// is not computed redundantly within the tiled loops. An analysis that /// figures it out has shown to be very complex. So this is left as a caller /// side determination. In this test pattern it is assumed that the tile sizes /// are selected such that all producers when fused into the tiled loops do no /// have redundant computation. struct TestTileConsumerFuseAndYieldProducerUsingSCFForOp : public OpInterfaceRewritePattern { TestTileConsumerFuseAndYieldProducerUsingSCFForOp( MLIRContext *context, scf::SCFTilingOptions options, TransformationFilter filter = TransformationFilter(), PatternBenefit benefit = 1) : OpInterfaceRewritePattern(context, benefit), options(std::move(options)), filter(std::move(filter)) {} LogicalResult matchAndRewrite(TilingInterface rootOp, PatternRewriter &rewriter) const override { if (failed(filter.checkAndNotify(rewriter, rootOp))) return failure(); // Collect list of operations that can be tiled and fused. llvm::SmallDenseSet tiledAndFusedOps = collectTiledAndFusedOps(rootOp); auto isIgnoredUser = [&](Operation *user, scf::ForOp outerMostTiledLoop) { return tiledAndFusedOps.count(user) || isa(user) || outerMostTiledLoop->isAncestor(user); }; // The rest of this method is similar to // scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp, except that also // yields replacements for values of the fused producer. // 1. Tile the consumer. SmallVector yieldedValuesToOrigValues; FailureOr tilingResult = scf::tileUsingSCFForOp(rewriter, rootOp, options); if (failed(tilingResult)) { return rewriter.notifyMatchFailure(rootOp, "failed to tile base operation"); } yieldedValuesToOrigValues.append(rootOp->result_begin(), rootOp->result_end()); // 2. Tiling each operation results in generation of slices. The source of // these slices could be producers that can be fused into the tiled loops by // computing the slices of these producers in-place. This results in more // slices created for operands of the "fused producer". This open up more // opportunities for fusion. Use a worklist to fuse greedily. auto addCandidateSlices = [](Operation *fusedOp, std::deque &candidates) { for (Value operand : fusedOp->getOperands()) if (auto sliceOp = operand.getDefiningOp()) candidates.push_back(sliceOp); }; std::deque candidates; addCandidateSlices(tilingResult->tiledOps.back(), candidates); OpBuilder::InsertionGuard g(rewriter); auto forLoops = llvm::to_vector(llvm::map_range( tilingResult->loops, [](auto op) { return cast(op); })); while (!candidates.empty()) { // Traverse the slices in BFS fashion. tensor::ExtractSliceOp candidateSliceOp = candidates.front(); candidates.pop_front(); // Materialize the slice of the producer in place. std::optional fusedProducer = tileAndFuseProducerOfSlice(rewriter, candidateSliceOp, forLoops); if (!fusedProducer) continue; // Check if the fused producer has other uses that require the value // to be yielded from within the tiled loop. OpResult untiledProducer = fusedProducer->origProducer; if (llvm::any_of(untiledProducer.getUsers(), [&](Operation *user) { return !isIgnoredUser(user, forLoops.front()); })) { yieldReplacementForFusedProducer(rewriter, candidateSliceOp, fusedProducer.value(), forLoops); yieldedValuesToOrigValues.push_back(untiledProducer); } // Add more fusion candidates to the worklist. if (auto fusedProducerOp = fusedProducer->tiledAndFusedProducer.getDefiningOp()) addCandidateSlices(fusedProducerOp, candidates); } scf::ForOp outermostLoop = forLoops.front(); for (auto [index, origVal] : llvm::enumerate(yieldedValuesToOrigValues)) { Value replacement = outermostLoop.getResult(index); rewriter.replaceUsesWithIf(origVal, replacement, [&](OpOperand &use) { return !isIgnoredUser(use.getOwner(), outermostLoop); }); } rewriter.eraseOp(rootOp); filter.replaceTransformationFilter(rewriter, tilingResult->tiledOps.back()); return success(); } private: /// Starting from `op` walk all operands backwards to find all /// potentially fusable operations, i.e. operations that implement /// the `TilingInterface`. llvm::SmallDenseSet collectTiledAndFusedOps(Operation *op) const { SmallVector worklist; llvm::SmallDenseSet producers; worklist.push_back(op); producers.insert(op); while (!worklist.empty()) { Operation *current = worklist.pop_back_val(); for (OpOperand &operand : current->getOpOperands()) { Operation *producer = operand.get().getDefiningOp(); if (!producer || !isa(producer) || producers.count(producer)) continue; worklist.push_back(producer); producers.insert(producer); } } return producers; } scf::SCFTilingOptions options; TransformationFilter filter; }; /// Pattern to lower operations that implement the `TilingInterface` to /// loops/scalar IR using `scf.for`. struct LowerToLoopsUsingSCFForOp : public OpInterfaceRewritePattern { using OpInterfaceRewritePattern::OpInterfaceRewritePattern; /// `matchAndRewrite` implementation that returns the significant transformed /// pieces of IR. LogicalResult matchAndRewrite(TilingInterface op, PatternRewriter &rewriter) const override { FailureOr> loops = scf::lowerToLoopsUsingSCFForOp(rewriter, op); if (failed(loops)) return rewriter.notifyMatchFailure(op, "failed to lower to loops"); rewriter.eraseOp(op); return loops; } }; /// Test pass for testing the use of `TilingInterface`. struct TestTilingInterfacePass : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestTilingInterfacePass) TestTilingInterfacePass() = default; TestTilingInterfacePass(const TestTilingInterfacePass &pass) : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); linalg::registerTilingInterfaceExternalModels(registry); tensor::registerTilingInterfaceExternalModels(registry); } StringRef getArgument() const final { return "test-tiling-interface"; } StringRef getDescription() const final { return "Test tiling using TilingInterface"; } Option testTiling{ *this, "tile-using-scf-for", llvm::cl::desc( "Test tiling using TilingInterface with scf.for operations"), llvm::cl::init(false)}; Option testTilingForAll{ *this, "tile-using-scf-forall", llvm::cl::desc( "Test tiling using TilingInterface with scf.forall operations"), llvm::cl::init(false)}; Option testTileConsumerFuseAndYieldProducer{ *this, "tile-consumer-fuse-and-yield-producer-using-scf-for", llvm::cl::desc( "Test tile and fuse transformation while yielding fused producer " "replacements using TilingInterface with scf.for operations"), llvm::cl::init(false)}; Option testTileConsumerAndFuseProducer{ *this, "tile-consumer-and-fuse-producer-using-scf-for", llvm::cl::desc("Test tile and fuse transformation using TilingInterface " "with scf.for operations"), llvm::cl::init(false)}; Option testLoweringToScalar{ *this, "lower-to-scalar-using-scf-for", llvm::cl::desc("Test lowering to scalar implementation using " "TilingInterface with scf.for operations"), llvm::cl::init(false)}; void runOnOperation() override; private: void addTestPatterns(MLIRContext *context, RewritePatternSet &patterns); }; } // namespace static void addPatternForTiling(MLIRContext *context, RewritePatternSet &patterns, StringRef filterName, ArrayRef tileSizes, ArrayRef interchange = {}) { scf::SCFTilingOptions tilingOptions; SmallVector tileSizesOfr = getAsIndexOpFoldResult(context, tileSizes); tilingOptions.setTileSizes(tileSizesOfr).setInterchange(interchange); TransformationFilter filter(StringAttr::get(context, filterName), StringAttr::get(context, "tiled")); patterns.add(context, tilingOptions, filter); } static void addPatternForTilingUsingForall( MLIRContext *context, RewritePatternSet &patterns, StringRef filterName, ArrayRef tileSizes, ArrayRef mapping = {}, ArrayRef interchange = {}) { scf::SCFTilingOptions tilingOptions; SmallVector tileSizesOfr = getAsIndexOpFoldResult(context, tileSizes); tilingOptions.setTileSizes(tileSizesOfr).setInterchange(interchange); tilingOptions.setMapping(mapping); TransformationFilter filter(StringAttr::get(context, filterName), StringAttr::get(context, "tiled")); patterns.add(context, tilingOptions, filter); } static void addPatternForTileFuseAndYield(MLIRContext *context, RewritePatternSet &patterns, StringRef filterName, ArrayRef tileSizes, ArrayRef interchange = {}) { scf::SCFTilingOptions tilingOptions; SmallVector tileSizesOfr = getAsIndexOpFoldResult(context, tileSizes); tilingOptions.setTileSizes(tileSizesOfr).setInterchange(interchange); TransformationFilter filter(StringAttr::get(context, filterName), StringAttr::get(context, "tiled")); patterns.add( context, tilingOptions, filter); } static void addPatternForTileAndFuse(MLIRContext *context, RewritePatternSet &patterns, StringRef filterName, ArrayRef tileSizes, ArrayRef interchange = {}) { scf::SCFTileAndFuseOptions tileAndFuseOptions; SmallVector tileSizesOfr = getAsIndexOpFoldResult(context, tileSizes); tileAndFuseOptions.tilingOptions.setTileSizes(tileSizesOfr) .setInterchange(interchange); TransformationFilter filter(StringAttr::get(context, filterName), StringAttr::get(context, "tiled")); patterns.add( context, tileAndFuseOptions, filter); } void TestTilingInterfacePass::addTestPatterns(MLIRContext *context, RewritePatternSet &patterns) { if (testTiling) { // 1. Tiling M and N dims of `linalg.matmul` on tensors. addPatternForTiling(context, patterns, "simple_gemm", {10, 20}); // 2. Tiling M, N and K of `linalg.matmul` on buffers. addPatternForTiling(context, patterns, "simple_gemm_memref", {10, 20, 30}); // 3. Tiling 3D parallel generic op which implements a transpose addPatternForTiling(context, patterns, "parallel_generic_transpose", {10, 0, 20}); // 4. Tiling 2D conv op. addPatternForTiling(context, patterns, "simple_conv", {0, 0, 0, 0, 10, 20, 30}); // 5. Tiling a simple op with `linalg.index` inside. addPatternForTiling(context, patterns, "indexed_semantics", {10, 20}); // 6. Tiling + interchange of an operation addPatternForTiling(context, patterns, "gemm_interchange", {10, 20, 30}, {1, 2, 0}); // 7. Tiling for 2D pad tensor operations. addPatternForTiling(context, patterns, "pad_2dtiling", {2, 3}); // 8. Tiling inner dimension of 2d pad tensor operations. addPatternForTiling(context, patterns, "pad_inner_tiling", {0, 3}); // 9. Tiling inner dimension of 2d pad tensor operations. addPatternForTiling(context, patterns, "pad_outer_tiling", {2, 3}); // 10. Tiling M and N dims of `linalg.copy` on memrefs. addPatternForTiling(context, patterns, "simple_copy_memref", {10, 20}); return; } if (testTilingForAll) { // 1. Tiling M and N dims of `linalg.matmul` on tensors. addPatternForTilingUsingForall( context, patterns, "simple_gemm", {10, 20}, {gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimY), gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimX)}); // 2. Tiling 3D parallel generic op which implements a transpose. addPatternForTilingUsingForall(context, patterns, "parallel_generic_transpose", {10, 0, 20}); // 3. Tiling 2D conv op. addPatternForTilingUsingForall(context, patterns, "simple_conv", {0, 0, 0, 0, 10, 20, 30}); // 4. Tiling a simple op with `linalg.index` inside. addPatternForTilingUsingForall(context, patterns, "indexed_semantics", {10, 20}); return; } if (testTileConsumerAndFuseProducer) { // 1. Tile and fuse of gemm with fill producer and bias-add consumer. addPatternForTileAndFuse(context, patterns, "fusion", {10, 20}); // 2. Tile and fuse sequence of GEMMs, by fusing only along M. addPatternForTileAndFuse(context, patterns, "gemm_fusion", {10}); // 3. Tile and fuse gemm with consumer + interchange of tiled loops. addPatternForTileAndFuse(context, patterns, "gemm_interchange_fusion", {10, 20}, {1, 0}); // 4. Tile and fuse matmul + transpose(matmul). Will introduce redundant // computations. addPatternForTileAndFuse(context, patterns, "gemm_plus_gemm_fusion", {10, 20}); // 5. Tile and fuse a sequence of GEMMs by tiling and fusing only along M // dimension. addPatternForTileAndFuse(context, patterns, "gemm_sequence_fusion", {10}); // 6. Fusion of back-to-back-reduction ops addPatternForTileAndFuse(context, patterns, "reduction_sequence_fusion", {10}); return; } if (testTileConsumerFuseAndYieldProducer) { // 1. Fusion of back-to-back-reduction ops addPatternForTileFuseAndYield(context, patterns, "gemm_sequence_fusion_and_yield", {10}); return; } if (testLoweringToScalar) { patterns.add(context); } } void TestTilingInterfacePass::runOnOperation() { MLIRContext *context = &getContext(); RewritePatternSet tilingPatterns(context); addTestPatterns(context, tilingPatterns); if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(tilingPatterns)))) return signalPassFailure(); } namespace mlir { namespace test { void registerTestTilingInterface() { PassRegistration(); } } // namespace test } // namespace mlir