diff options
author | Nicolas Vasilache <nicolas.vasilache@gmail.com> | 2021-03-24 11:24:22 +0000 |
---|---|---|
committer | Nicolas Vasilache <nicolas.vasilache@gmail.com> | 2021-03-24 11:51:28 +0000 |
commit | 7716e5535c6b248b5faabd2d1af01415a78da8d7 (patch) | |
tree | 1294d00a922a1898681d8be89870543bf64ca511 | |
parent | e9015bd59519e205c2205fa413c8af7e677cc65d (diff) | |
download | llvm-7716e5535c6b248b5faabd2d1af01415a78da8d7.zip llvm-7716e5535c6b248b5faabd2d1af01415a78da8d7.tar.gz llvm-7716e5535c6b248b5faabd2d1af01415a78da8d7.tar.bz2 |
[mlir] Fixes to hoist padding
Fix the BlockAndValueMapping update that was missing entries for scf.for op's blockIterArgs.
Skip cloning subtensors of the padded tensor as the logic for these is separate.
Add a filter to drop side-effecting ops.
Tests are beefed up to verify the IR is sound in all hoisting configurations for 2-level 3-D tiled matmul.
Differential Revision: https://reviews.llvm.org/D99255
-rw-r--r-- | mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp | 14 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/hoist-padding.mlir | 73 | ||||
-rw-r--r-- | mlir/test/lib/Transforms/TestLinalgTransforms.cpp | 12 |
3 files changed, 90 insertions, 9 deletions
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 3baf9b4..b4a2182c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -793,7 +793,15 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp, backwardSlice.insert(padTensorOp); // Stack step 1. iteratively clone loops and push `packedTensor`. for (Operation *op : backwardSlice) { - if (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op)) { + // Specifically sit out in the subtenso(packedTensor) case: this is the + // piece we seek to replace. + if (auto subTensor = dyn_cast<SubTensorOp>(op)) + if (bvm.lookupOrDefault(subTensor.source()) == packedTensor) + continue; + auto effects = dyn_cast<MemoryEffectOpInterface>(op); + bool hasNoEffects = !effects || effects.hasNoEffect(); + if (hasNoEffects && + (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op))) { b.clone(*op, bvm); continue; } @@ -808,8 +816,10 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp, b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()), bvm.lookupOrDefault(forOp.upperBound()), bvm.lookupOrDefault(forOp.step()), packedTensor); - + // Map the induction var, region args and results to the `clonedForOp`. bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar()); + bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs()); + bvm.map(forOp.getResults(), clonedForOp.getResults()); assert(clonedForOp->getNumRegions() == 1); clonedLoopIvs.push_back(clonedForOp.getInductionVar()); diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir index 2459d2a..248aa64 100644 --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -1,4 +1,13 @@ -// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding-2-level -canonicalize | FileCheck %s +// Specific structural checks are performed on 2-level hoisting +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=2 -canonicalize | FileCheck %s + +// IR verification is performed on [0-6]-level hoisting +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=0 | FileCheck %s --check-prefix=VERIFIER-ONLY +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=1 | FileCheck %s --check-prefix=VERIFIER-ONLY +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=3 | FileCheck %s --check-prefix=VERIFIER-ONLY +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=4 | FileCheck %s --check-prefix=VERIFIER-ONLY +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=5 | FileCheck %s --check-prefix=VERIFIER-ONLY +// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=6 | FileCheck %s --check-prefix=VERIFIER-ONLY // CHECK-DAG: #[[$DIV3:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 3)> // CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)> @@ -14,6 +23,7 @@ // CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor // CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor // CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor +// VERIFIER-ONLY-LABEL: func @matmul_tensors func @matmul_tensors( %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> @@ -140,6 +150,7 @@ func @matmul_tensors( #map2 = affine_map<(d0, d1) -> (2, d0 - d1)> // CHECK-LABEL: func @dot +// VERIFIER-ONLY-LABEL: func @dot func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>) -> tensor<f32> { @@ -217,3 +228,63 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>) } return %4 : tensor<f32> } + +// ----- + +// CHECK-LABEL: func @matmul_2d_tiling +// VERIFIER-ONLY-LABEL: func @matmul_2d_tiling +func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %arg2: tensor<32x64xf32>) -> tensor<32x64xf32> { + %c128 = constant 128 : index + %c64 = constant 64 : index + %c32 = constant 32 : index + %c16 = constant 16 : index + %cst = constant 0.000000e+00 : f32 + %c2 = constant 2 : index + %c4 = constant 4 : index + %c0 = constant 0 : index + %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) { + %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) { + %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) { + %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32> + %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32> + %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32> + %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) { + %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) { + %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) { + %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32> + %13 = tensor.cast %12 : tensor<2x16xf32> to tensor<?x?xf32> + %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32> + %15 = tensor.cast %14 : tensor<16x4xf32> to tensor<?x?xf32> + %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32> + %17 = tensor.cast %16 : tensor<2x4xf32> to tensor<?x?xf32> + %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0] { + ^bb0(%arg15: index, %arg16: index): // no predecessors + linalg.yield %cst : f32 + } : tensor<?x?xf32> to tensor<2x16xf32> + %19 = linalg.pad_tensor %15 low[%c0, %c0] high[%c0, %c0] { + ^bb0(%arg15: index, %arg16: index): // no predecessors + linalg.yield %cst : f32 + } : tensor<?x?xf32> to tensor<16x4xf32> + %20 = linalg.pad_tensor %17 low[%c0, %c0] high[%c0, %c0] { + ^bb0(%arg15: index, %arg16: index): // no predecessors + linalg.yield %cst : f32 + } : tensor<?x?xf32> to tensor<2x4xf32> + %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32> + %22 = tensor.cast %21 : tensor<2x4xf32> to tensor<?x?xf32> + %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32> + scf.yield %23 : tensor<16x32xf32> + } + scf.yield %11 : tensor<16x32xf32> + } + scf.yield %10 : tensor<16x32xf32> + } + %8 = tensor.cast %7 : tensor<16x32xf32> to tensor<?x?xf32> + %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32> + scf.yield %9 : tensor<32x64xf32> + } + scf.yield %3 : tensor<32x64xf32> + } + scf.yield %2 : tensor<32x64xf32> + } + return %1 : tensor<32x64xf32> +} diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index 276a9f7..fd8fb3b 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -84,9 +84,9 @@ struct TestLinalgTransforms Option<bool> testTileAndPadPattern{ *this, "test-tile-and-pad-pattern", llvm::cl::desc("Test tile and pad pattern"), llvm::cl::init(false)}; - Option<bool> testHoistPadding2Levels{*this, "test-hoist-padding-2-level", - llvm::cl::desc("Test hoist padding"), - llvm::cl::init(false)}; + Option<int> testHoistPadding{*this, "test-hoist-padding", + llvm::cl::desc("Test hoist padding"), + llvm::cl::init(0)}; }; } // end anonymous namespace @@ -571,9 +571,9 @@ void TestLinalgTransforms::runOnFunction() { return applyAffineMinSCFCanonicalizationPatterns(getFunction()); if (testTileAndPadPattern) return applyTileAndPadPattern(getFunction()); - if (testHoistPadding2Levels) { - getFunction().walk([](linalg::PadTensorOp padTensorOp) { - (void)linalg::hoistPaddingOnTensors(padTensorOp, 2); + if (testHoistPadding) { + getFunction().walk([&](linalg::PadTensorOp padTensorOp) { + (void)linalg::hoistPaddingOnTensors(padTensorOp, testHoistPadding); }); } } |