diff options
author | Uday Bondhugula <uday@polymagelabs.com> | 2024-02-29 06:32:42 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-29 06:32:42 +0530 |
commit | 2679d3793b264b2884cf5c9492e3311f08f41de3 (patch) | |
tree | ef9c1948c2b5ceccc9e616fe123c5b071c4d2fef | |
parent | 1a0986f0f7a18cef78852e91e73ec577ea05d8c4 (diff) | |
download | llvm-2679d3793b264b2884cf5c9492e3311f08f41de3.zip llvm-2679d3793b264b2884cf5c9492e3311f08f41de3.tar.gz llvm-2679d3793b264b2884cf5c9492e3311f08f41de3.tar.bz2 |
[MLIR][Affine] Add test pass for affine isContiguousAccess (#82923)
`isContiguousAccess` is an important affine analysis utility but is only
tested very indirectly via passes like vectorization and is not exposed.
Expose it and add a test pass for it that'll make it easier/feasible to
write test cases. This is especially needed since the utility can be
significantly enhanced in power, and we need a test pass to exercise it
directly.
This pass can in the future be used to test the utility of invariant
accesses as well.
-rw-r--r-- | mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h | 20 | ||||
-rw-r--r-- | mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp | 54 | ||||
-rw-r--r-- | mlir/test/Dialect/Affine/access-analysis.mlir | 67 | ||||
-rw-r--r-- | mlir/test/lib/Dialect/Affine/CMakeLists.txt | 2 | ||||
-rw-r--r-- | mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp | 83 | ||||
-rw-r--r-- | mlir/tools/mlir-opt/mlir-opt.cpp | 2 |
6 files changed, 197 insertions, 31 deletions
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h index 92f3d5a..1f64b57 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h @@ -60,6 +60,26 @@ uint64_t getLargestDivisorOfTripCount(AffineForOp forOp); DenseSet<Value, DenseMapInfo<Value>> getInvariantAccesses(Value iv, ArrayRef<Value> indices); +/// Given: +/// 1. an induction variable `iv` of type AffineForOp; +/// 2. a `memoryOp` of type const LoadOp& or const StoreOp&; +/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous +/// is defined as either invariant or varying only along a unique MemRef dim. +/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to +/// convey the memRef access is invariant along `iv`). +/// +/// Prerequisites: +/// 1. `memRefDim` ~= nullptr; +/// 2. `iv` of the proper type; +/// 3. the MemRef accessed by `memoryOp` has no layout map or at most an +/// identity layout map. +/// +/// Currently only supports no layout map or identity layout map in the memref. +/// Returns false if the memref has a non-identity layoutMap. This behavior is +/// conservative. +template <typename LoadOrStoreOp> +bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, int *memRefDim); + using VectorizableLoopFun = std::function<bool(AffineForOp)>; /// Checks whether the loop is structurally vectorizable; i.e.: diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp index e645afe..fc0515b 100644 --- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp @@ -195,43 +195,25 @@ DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv, return res; } -/// Given: -/// 1. an induction variable `iv` of type AffineForOp; -/// 2. a `memoryOp` of type const LoadOp& or const StoreOp&; -/// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous -/// is defined as either invariant or varying only along a unique MemRef dim. -/// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to -/// convey the memRef access is invariant along `iv`). -/// -/// Prerequisites: -/// 1. `memRefDim` ~= nullptr; -/// 2. `iv` of the proper type; -/// 3. the MemRef accessed by `memoryOp` has no layout map or at most an -/// identity layout map. -/// -/// Currently only supports no layoutMap or identity layoutMap in the MemRef. -/// Returns false if the MemRef has a non-identity layoutMap or more than 1 -/// layoutMap. This is conservative. -/// -// TODO: check strides. +// TODO: check access stride. template <typename LoadOrStoreOp> -static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, - int *memRefDim) { - static_assert( - llvm::is_one_of<LoadOrStoreOp, AffineLoadOp, AffineStoreOp>::value, - "Must be called on either LoadOp or StoreOp"); +bool mlir::affine::isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, + int *memRefDim) { + static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface, + AffineWriteOpInterface>::value, + "Must be called on either an affine read or write op"); assert(memRefDim && "memRefDim == nullptr"); auto memRefType = memoryOp.getMemRefType(); if (!memRefType.getLayout().isIdentity()) - return memoryOp.emitError("NYI: non-trivial layoutMap"), false; + return memoryOp.emitError("NYI: non-trivial layout map"), false; int uniqueVaryingIndexAlongIv = -1; auto accessMap = memoryOp.getAffineMap(); SmallVector<Value, 4> mapOperands(memoryOp.getMapOperands()); unsigned numDims = accessMap.getNumDims(); for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) { - // Gather map operands used result expr 'i' in 'exprOperands'. + // Gather map operands used in result expr 'i' in 'exprOperands'. SmallVector<Value, 4> exprOperands; auto resultExpr = accessMap.getResult(i); resultExpr.walk([&](AffineExpr expr) { @@ -241,7 +223,7 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]); }); // Check access invariance of each operand in 'exprOperands'. - for (auto exprOperand : exprOperands) { + for (Value exprOperand : exprOperands) { if (!isAccessIndexInvariant(iv, exprOperand)) { if (uniqueVaryingIndexAlongIv != -1) { // 2+ varying indices -> do not vectorize along iv. @@ -259,6 +241,13 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp, return true; } +template bool mlir::affine::isContiguousAccess(Value iv, + AffineReadOpInterface loadOp, + int *memRefDim); +template bool mlir::affine::isContiguousAccess(Value iv, + AffineWriteOpInterface loadOp, + int *memRefDim); + template <typename LoadOrStoreOp> static bool isVectorElement(LoadOrStoreOp memoryOp) { auto memRefType = memoryOp.getMemRefType(); @@ -344,10 +333,13 @@ bool mlir::affine::isVectorizableLoopBody( auto load = dyn_cast<AffineLoadOp>(op); auto store = dyn_cast<AffineStoreOp>(op); int thisOpMemRefDim = -1; - bool isContiguous = load ? isContiguousAccess(loop.getInductionVar(), load, - &thisOpMemRefDim) - : isContiguousAccess(loop.getInductionVar(), store, - &thisOpMemRefDim); + bool isContiguous = + load ? isContiguousAccess(loop.getInductionVar(), + cast<AffineReadOpInterface>(*load), + &thisOpMemRefDim) + : isContiguousAccess(loop.getInductionVar(), + cast<AffineWriteOpInterface>(*store), + &thisOpMemRefDim); if (thisOpMemRefDim != -1) { // If memory accesses vary across different dimensions then the loop is // not vectorizable. diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir new file mode 100644 index 0000000..68310b9 --- /dev/null +++ b/mlir/test/Dialect/Affine/access-analysis.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: func @loop_1d +func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) { + %c0 = arith.constant 0 : index + %M = memref.dim %A, %c0 : memref<?x?xf32> + affine.for %i = 0 to %M { + affine.for %j = 0 to %M { + affine.load %A[%c0, %i] : memref<?x?xf32> + // expected-remark@above {{contiguous along loop 0}} + affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32> + // expected-remark@above {{contiguous along loop 1}} + // Note/FIXME: access stride isn't being checked. + // expected-remark@-3 {{contiguous along loop 0}} + + // These are all non-contiguous along both loops. Nothing is emitted. + affine.load %A[%i, %c0] : memref<?x?xf32> + // Note/FIXME: access stride isn't being checked. + affine.load %A[%i, 8 * %j] : memref<?x?xf32> + // expected-remark@above {{contiguous along loop 1}} + affine.load %A[%j, 4 * %i] : memref<?x?xf32> + // expected-remark@above {{contiguous along loop 0}} + } + } + return +} + +// ----- + +#map = affine_map<(d0) -> (d0 * 16)> +#map1 = affine_map<(d0) -> (d0 * 16 + 16)> +#map2 = affine_map<(d0) -> (d0)> +#map3 = affine_map<(d0) -> (d0 + 1)> + +func.func @tiled(%arg0: memref<*xf32>) { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<1x224x224x64xf32> + %cast = memref.cast %arg0 : memref<*xf32> to memref<64xf32> + affine.for %arg1 = 0 to 4 { + affine.for %arg2 = 0 to 224 { + affine.for %arg3 = 0 to 14 { + %alloc_0 = memref.alloc() : memref<1x16x1x16xf32> + affine.for %arg4 = #map(%arg1) to #map1(%arg1) { + affine.for %arg5 = #map(%arg3) to #map1(%arg3) { + %0 = affine.load %cast[%arg4] : memref<64xf32> + // expected-remark@above {{contiguous along loop 3}} + affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32> + // expected-remark@above {{contiguous along loop 4}} + // expected-remark@above {{contiguous along loop 2}} + } + } + affine.for %arg4 = #map(%arg1) to #map1(%arg1) { + affine.for %arg5 = #map2(%arg2) to #map3(%arg2) { + affine.for %arg6 = #map(%arg3) to #map1(%arg3) { + %0 = affine.load %alloc_0[0, %arg1 * -16 + %arg4, -%arg2 + %arg5, %arg3 * -16 + %arg6] : memref<1x16x1x16xf32> + // expected-remark@above {{contiguous along loop 5}} + // expected-remark@above {{contiguous along loop 2}} + affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32> + // expected-remark@above {{contiguous along loop 3}} + } + } + } + memref.dealloc %alloc_0 : memref<1x16x1x16xf32> + } + } + } + return +} diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt index af9f312..14960a4 100644 --- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt @@ -3,6 +3,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses TestAffineDataCopy.cpp TestAffineLoopUnswitching.cpp TestAffineLoopParametricTiling.cpp + TestAccessAnalysis.cpp TestDecomposeAffineOps.cpp TestReifyValueBounds.cpp TestLoopFusion.cpp @@ -21,6 +22,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses LINK_LIBS PUBLIC MLIRArithTransforms + MLIRAffineAnalysis MLIRAffineTransforms MLIRAffineUtils MLIRIR diff --git a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp new file mode 100644 index 0000000..b380462 --- /dev/null +++ b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp @@ -0,0 +1,83 @@ +//===- TestAccessAnalysis.cpp - Test affine access analysis utility -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to test affine access analysis utilities. +// +//===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h" +#include "mlir/Dialect/Affine/Analysis/Utils.h" +#include "mlir/Dialect/Affine/LoopFusionUtils.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" + +#define PASS_NAME "test-affine-access-analysis" + +using namespace mlir; +using namespace mlir::affine; + +namespace { + +struct TestAccessAnalysis + : public PassWrapper<TestAccessAnalysis, OperationPass<func::FuncOp>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAccessAnalysis) + + StringRef getArgument() const final { return PASS_NAME; } + StringRef getDescription() const final { + return "Tests affine memory access analysis utility"; + } + + void runOnOperation() override; +}; + +} // namespace + +/// Gathers all affine load/store ops in loop nest rooted at 'forOp' into +/// 'loadAndStoreOps'. +static void +gatherLoadsAndStores(AffineForOp forOp, + SmallVectorImpl<Operation *> &loadAndStoreOps) { + forOp.walk([&](Operation *op) { + if (isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) + loadAndStoreOps.push_back(op); + }); +} + +void TestAccessAnalysis::runOnOperation() { + SmallVector<Operation *> loadStores; + SmallVector<AffineForOp> enclosingOps; + // Go over all top-level affine.for ops and test each contained affine + // access's contiguity along every surrounding loop IV. + for (auto forOp : getOperation().getOps<AffineForOp>()) { + loadStores.clear(); + gatherLoadsAndStores(forOp, loadStores); + for (Operation *memOp : loadStores) { + enclosingOps.clear(); + getAffineForIVs(*memOp, &enclosingOps); + for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) { + int memRefDim; + bool isContiguous; + if (auto read = dyn_cast<AffineReadOpInterface>(memOp)) { + isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(), + read, &memRefDim); + } else { + isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(), + cast<AffineWriteOpInterface>(memOp), + &memRefDim); + } + if (isContiguous && memRefDim == 0) + memOp->emitRemark("contiguous along loop ") << d << '\n'; + } + } + } +} + +namespace mlir { +void registerTestAffineAccessAnalysisPass() { + PassRegistration<TestAccessAnalysis>(); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 4dfa05c..0ba1a3a 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -43,6 +43,7 @@ void registerSliceAnalysisTestPass(); void registerSymbolTestPasses(); void registerRegionTestPasses(); void registerTestAffineDataCopyPass(); +void registerTestAffineAccessAnalysisPass(); void registerTestAffineReifyValueBoundsPass(); void registerTestAffineLoopUnswitchingPass(); void registerTestAffineWalk(); @@ -169,6 +170,7 @@ void registerTestPasses() { registerSymbolTestPasses(); registerRegionTestPasses(); registerTestAffineDataCopyPass(); + registerTestAffineAccessAnalysisPass(); registerTestAffineLoopUnswitchingPass(); registerTestAffineReifyValueBoundsPass(); registerTestAffineWalk(); |