aboutsummaryrefslogtreecommitdiff
path: root/mlir
diff options
context:
space:
mode:
Diffstat (limited to 'mlir')
-rw-r--r--mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h54
-rw-r--r--mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td40
-rw-r--r--mlir/include/mlir/Dialect/MemRef/IR/MemRef.h1
-rw-r--r--mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td2
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td36
-rw-r--r--mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td80
-rw-r--r--mlir/include/mlir/Dialect/Shard/IR/ShardOps.td2
-rw-r--r--mlir/include/mlir/Dialect/Vector/IR/VectorOps.td1
-rw-r--r--mlir/include/mlir/IR/Remarks.h140
-rw-r--r--mlir/include/mlir/Interfaces/CMakeLists.txt1
-rw-r--r--mlir/include/mlir/Interfaces/InferIntRangeInterface.h12
-rw-r--r--mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h145
-rw-r--r--mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td45
-rw-r--r--mlir/include/mlir/Remark/RemarkStreamer.h1
-rw-r--r--mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h9
-rw-r--r--mlir/lib/Analysis/CMakeLists.txt2
-rw-r--r--mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp127
-rw-r--r--mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp32
-rw-r--r--mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp5
-rw-r--r--mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp54
-rw-r--r--mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp3
-rw-r--r--mlir/lib/Dialect/MemRef/IR/CMakeLists.txt3
-rw-r--r--mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp59
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp261
-rw-r--r--mlir/lib/Dialect/Vector/IR/VectorOps.cpp105
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp62
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp52
-rw-r--r--mlir/lib/IR/Diagnostics.cpp6
-rw-r--r--mlir/lib/IR/MLIRContext.cpp15
-rw-r--r--mlir/lib/IR/Remarks.cpp57
-rw-r--r--mlir/lib/Interfaces/CMakeLists.txt16
-rw-r--r--mlir/lib/Interfaces/InferIntRangeInterface.cpp19
-rw-r--r--mlir/lib/Interfaces/InferStridedMetadataInterface.cpp36
-rw-r--r--mlir/lib/Remark/RemarkStreamer.cpp4
-rw-r--r--mlir/lib/Target/LLVMIR/DebugImporter.cpp4
-rw-r--r--mlir/lib/Target/Wasm/TranslateFromWasm.cpp2
-rw-r--r--mlir/lib/Tools/PDLL/Parser/Parser.cpp2
-rw-r--r--mlir/lib/Tools/mlir-opt/MlirOptMain.cpp37
-rw-r--r--mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp2
-rw-r--r--mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp2
-rw-r--r--mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp11
-rw-r--r--mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir67
-rw-r--r--mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir1
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir29
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir6
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir32
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir38
-rw-r--r--mlir/test/Dialect/Linalg/match-ops-interpreter.mlir14
-rw-r--r--mlir/test/Dialect/Linalg/one-shot-bufferize.mlir16
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir102
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-private.mlir82
-rw-r--r--mlir/test/Dialect/SCF/one-shot-bufferize.mlir12
-rw-r--r--mlir/test/Dialect/Tensor/one-shot-bufferize.mlir12
-rw-r--r--mlir/test/Dialect/Vector/canonicalize/vector-step.mlir311
-rw-r--r--mlir/test/Dialect/Vector/vector-unroll-options.mlir68
-rw-r--r--mlir/test/Dialect/Vector/vector-warp-distribute.mlir19
-rw-r--r--mlir/test/Integration/GPU/SPIRV/simple_add.mlir11
-rw-r--r--mlir/test/Pass/remark-final.mlir17
-rw-r--r--mlir/test/Target/LLVMIR/Import/debug-info.ll3
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir12
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir-invalid.mlir18
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir.mlir4
-rw-r--r--mlir/test/lib/Analysis/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp86
-rw-r--r--mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp26
-rw-r--r--mlir/test/lib/Dialect/OpenACC/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp6
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp8
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp110
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttrDefs.td17
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.cpp18
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.td5
-rw-r--r--mlir/test/lib/Dialect/Test/TestOpDefs.cpp44
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td15
-rw-r--r--mlir/test/lib/Pass/TestRemarksPass.cpp7
-rw-r--r--mlir/test/mlir-tblgen/cpp-class-comments.td10
-rw-r--r--mlir/tools/mlir-opt/mlir-opt.cpp2
-rw-r--r--mlir/tools/mlir-pdll/mlir-pdll.cpp2
-rw-r--r--mlir/tools/mlir-tblgen/EnumsGen.cpp21
-rw-r--r--mlir/tools/mlir-tblgen/OpInterfacesGen.cpp38
-rw-r--r--mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp35
-rw-r--r--mlir/unittests/IR/RemarkTest.cpp80
-rwxr-xr-xmlir/utils/generate-test-checks.py48
85 files changed, 2700 insertions, 303 deletions
diff --git a/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h
new file mode 100644
index 0000000..72ac247
--- /dev/null
+++ b/mlir/include/mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h
@@ -0,0 +1,54 @@
+//===- StridedMetadataRange.h - Strided metadata range analysis -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
+#define MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
+
+#include "mlir/Analysis/DataFlow/SparseAnalysis.h"
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
+
+namespace mlir {
+namespace dataflow {
+
+/// This lattice element represents the strided metadata of an SSA value.
+class StridedMetadataRangeLattice : public Lattice<StridedMetadataRange> {
+public:
+ using Lattice::Lattice;
+};
+
+/// Strided metadata range analysis determines the strided metadata ranges of
+/// SSA values using operations that define `InferStridedMetadataInterface`.
+///
+/// This analysis depends on DeadCodeAnalysis, SparseConstantPropagation, and
+/// IntegerRangeAnalysis, and will be a silent no-op if the analyses are not
+/// loaded in the same solver context.
+class StridedMetadataRangeAnalysis
+ : public SparseForwardDataFlowAnalysis<StridedMetadataRangeLattice> {
+public:
+ StridedMetadataRangeAnalysis(DataFlowSolver &solver,
+ int32_t indexBitwidth = 64);
+
+ /// At an entry point, we cannot reason about strided metadata ranges unless
+ /// the type also encodes the data. For example, a memref with static layout.
+ void setToEntryState(StridedMetadataRangeLattice *lattice) override;
+
+ /// Visit an operation. Invoke the transfer function on each operation that
+ /// implements `InferStridedMetadataInterface`.
+ LogicalResult
+ visitOperation(Operation *op,
+ ArrayRef<const StridedMetadataRangeLattice *> operands,
+ ArrayRef<StridedMetadataRangeLattice *> results) override;
+
+private:
+ /// Index bitwidth to use when operating with the int-ranges.
+ int32_t indexBitwidth = 64;
+};
+} // namespace dataflow
+} // end namespace mlir
+
+#endif // MLIR_ANALYSIS_DATAFLOW_STRIDEDMETADATARANGE_H
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 89fbeb7..d959464 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -263,6 +263,7 @@ class NVVM_PureSpecialRangeableRegisterOp<string mnemonic, list<Trait> traits =
let assemblyFormat = "(`range` $range^)? attr-dict `:` type($res)";
let llvmBuilder = baseLlvmBuilder # setRangeRetAttrCode # baseLlvmBuilderCoda;
let mlirBuilder = baseMlirBuilder # importRangeRetAttrCode # baseMlirBuilderCoda;
+ let hasVerifier = 1;
// Backwards-compatibility builder for an unspecified range.
let builders = [
@@ -279,6 +280,11 @@ class NVVM_PureSpecialRangeableRegisterOp<string mnemonic, list<Trait> traits =
SetIntRangeFn setResultRanges) {
nvvmInferResultRanges(getOperation(), getResult(), argRanges, setResultRanges);
}
+
+ // Verify the range attribute satisfies LLVM ConstantRange constructor requirements.
+ ::llvm::LogicalResult $cppClass::verify() {
+ return verifyConstantRangeAttr(getOperation(), getRange());
+ }
}];
}
@@ -1655,6 +1661,40 @@ def NVVM_ConvertFloatToTF32Op : NVVM_Op<"convert.float.to.tf32"> {
}];
}
+def NVVM_ConvertF32x2ToF4x2Op : NVVM_Op<"convert.f32x2.to.f4x2"> {
+ let summary = "Convert a pair of float inputs to f4x2";
+ let description = [{
+ This Op converts each of the given float inputs to the specified fp4 type.
+ The result `dst` is returned as an i8 type where the converted values are
+ packed such that the value converted from `a` is stored in the upper 4 bits
+ of `dst` and the value converted from `b` is stored in the lower 4 bits of
+ `dst`.
+ The `relu` attribute, when set, lowers to the '.relu' variant of
+ the cvt instruction.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cvt)
+ }];
+
+ let results = (outs I8:$dst);
+ let arguments = (ins F32:$a, F32:$b,
+ DefaultValuedAttr<BoolAttr, "false">:$relu,
+ TypeAttr:$dstTy);
+ let assemblyFormat = "$a `,` $b attr-dict `:` type($dst) `(` $dstTy `)`";
+ let hasVerifier = 1;
+
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(NVVM::ConvertF32x2ToF4x2Op op,
+ LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder);
+ }];
+
+ string llvmBuilder = [{
+ auto [intId, args] = NVVM::ConvertF32x2ToF4x2Op::getIntrinsicIDAndArgs(op, moduleTranslation, builder);
+ llvm::Value *packedI16 = createIntrinsicCall(builder, intId, args);
+ $dst = builder.CreateTruncOrBitCast(packedI16, llvm::Type::getInt8Ty(builder.getContext()));
+ }];
+}
+
def NVVM_ConvertF32x2ToF6x2Op : NVVM_Op<"convert.f32x2.to.f6x2"> {
let summary = "Convert a pair of float inputs to f6x2";
let description = [{
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
index 30f33ed..69447f7 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRef.h
@@ -17,6 +17,7 @@
#include "mlir/Interfaces/CastInterfaces.h"
#include "mlir/Interfaces/ControlFlowInterfaces.h"
#include "mlir/Interfaces/InferIntRangeInterface.h"
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/MemOpInterfaces.h"
#include "mlir/Interfaces/MemorySlotInterfaces.h"
diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 89bd0f1..b39207f 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -14,6 +14,7 @@ include "mlir/Dialect/MemRef/IR/MemRefBase.td"
include "mlir/Interfaces/CastInterfaces.td"
include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
+include "mlir/Interfaces/InferStridedMetadataInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/MemOpInterfaces.td"
include "mlir/Interfaces/MemorySlotInterfaces.td"
@@ -2085,6 +2086,7 @@ def MemRef_StoreOp : MemRef_Op<"store",
def SubViewOp : MemRef_OpWithOffsetSizesAndStrides<"subview", [
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>,
+ DeclareOpInterfaceMethods<InferStridedMetadataOpInterface>,
DeclareOpInterfaceMethods<MemorySpaceCastConsumerOpInterface>,
DeclareOpInterfaceMethods<ViewLikeOpInterface>,
AttrSizedOperandSegments,
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 77e833f..fecf81b 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1316,6 +1316,24 @@ def OpenACC_PrivateRecipeOp
}];
let hasRegionVerifier = 1;
+
+ let extraClassDeclaration = [{
+ /// Creates a PrivateRecipeOp and populates its regions based on the
+ /// variable type as long as the type implements MappableType or
+ /// PointerLikeType interface. If a type implements both, the MappableType
+ /// API will be preferred. Returns std::nullopt if the recipe cannot be
+ /// created or populated. The builder's current insertion point will be used
+ /// and it must be a valid place for this operation to be inserted. The
+ /// `recipeName` must be a unique name to prevent "redefinition of symbol"
+ /// IR errors.
+ static std::optional<PrivateRecipeOp> createAndPopulate(
+ ::mlir::OpBuilder &builder,
+ ::mlir::Location loc,
+ ::llvm::StringRef recipeName,
+ ::mlir::Type varType,
+ ::llvm::StringRef varName = "",
+ ::mlir::ValueRange bounds = {});
+ }];
}
//===----------------------------------------------------------------------===//
@@ -1410,6 +1428,24 @@ def OpenACC_FirstprivateRecipeOp
}];
let hasRegionVerifier = 1;
+
+ let extraClassDeclaration = [{
+ /// Creates a FirstprivateRecipeOp and populates its regions based on the
+ /// variable type as long as the type implements MappableType or
+ /// PointerLikeType interface. If a type implements both, the MappableType
+ /// API will be preferred. Returns std::nullopt if the recipe cannot be
+ /// created or populated. The builder's current insertion point will be used
+ /// and it must be a valid place for this operation to be inserted. The
+ /// `recipeName` must be a unique name to prevent "redefinition of symbol"
+ /// IR errors.
+ static std::optional<FirstprivateRecipeOp> createAndPopulate(
+ ::mlir::OpBuilder &builder,
+ ::mlir::Location loc,
+ ::llvm::StringRef recipeName,
+ ::mlir::Type varType,
+ ::llvm::StringRef varName = "",
+ ::mlir::ValueRange bounds = {});
+ }];
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
index 0d16255..6736bc8 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td
@@ -83,7 +83,15 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
The `originalVar` parameter is optional but enables support for dynamic
types (e.g., dynamic memrefs). When provided, implementations can extract
runtime dimension information from the original variable to create
- allocations with matching dynamic sizes.
+ allocations with matching dynamic sizes. When generating recipe bodies,
+ `originalVar` should be the block argument representing the original
+ variable in the recipe region.
+
+ The `needsFree` output parameter indicates whether the allocated memory
+ requires explicit deallocation. Implementations should set this to true
+ for heap allocations that need a matching deallocation operation (e.g.,
+ alloc) and false for stack-based allocations (e.g., alloca). During
+ recipe generation, this determines whether a destroy region is created.
Returns a Value representing the result of the allocation. If no value
is returned, it means the allocation was not successfully generated.
@@ -94,7 +102,8 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
"::mlir::Location":$loc,
"::llvm::StringRef":$varName,
"::mlir::Type":$varType,
- "::mlir::Value":$originalVar),
+ "::mlir::Value":$originalVar,
+ "bool &":$needsFree),
/*methodBody=*/"",
/*defaultImplementation=*/[{
return {};
@@ -102,23 +111,34 @@ def OpenACC_PointerLikeTypeInterface : TypeInterface<"PointerLikeType"> {
>,
InterfaceMethod<
/*description=*/[{
- Generates deallocation operations for the pointer-like type. It deallocates
- the instance provided.
+ Generates deallocation operations for the pointer-like type.
- The `varPtr` parameter is required and must represent an instance that was
- previously allocated. If the current type is represented in a way that it
- does not capture the pointee type, `varType` must be passed in to provide
- the necessary type information. Nothing is generated in case the allocate
- is `alloca`-like.
+ The `varToFree` parameter is required and must represent an instance
+ that was previously allocated. When generating recipe bodies, this
+ should be the block argument representing the private variable in the
+ destroy region.
+
+ The `allocRes` parameter is optional and provides the result of the
+ corresponding allocation from the init region. This allows implementations
+ to inspect the allocation operation to determine the appropriate
+ deallocation strategy. This is necessary because in recipe generation,
+ the allocation and deallocation occur in separate regions. Dialects that
+ use only one allocation type or can determine deallocation from type
+ information alone may ignore this parameter.
- Returns true if deallocation was successfully generated or successfully
- deemed as not needed to be generated, false otherwise.
+ The `varType` parameter must be provided if the current type does not
+ capture the pointee type information. No deallocation is generated for
+ stack-based allocations (e.g., alloca).
+
+ Returns true if deallocation was successfully generated or determined to
+ be unnecessary, false otherwise.
}],
/*retTy=*/"bool",
/*methodName=*/"genFree",
/*args=*/(ins "::mlir::OpBuilder &":$builder,
"::mlir::Location":$loc,
- "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varPtr,
+ "::mlir::TypedValue<::mlir::acc::PointerLikeType>":$varToFree,
+ "::mlir::Value":$allocRes,
"::mlir::Type":$varType),
/*methodBody=*/"",
/*defaultImplementation=*/[{
@@ -274,6 +294,14 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> {
The `initVal` can be empty - it is primarily needed for reductions
to ensure the variable is also initialized with appropriate value.
+ The `needsDestroy` out-parameter is set by implementations to indicate
+ that destruction code must be generated after the returned private
+ variable usages, typically in the destroy region of recipe operations
+ (for example, when heap allocations or temporaries requiring cleanup
+ are created during initialization). When `needsDestroy` is set, callers
+ should invoke `generatePrivateDestroy` in the recipe's destroy region
+ with the privatized value returned by this method.
+
If the return value is empty, it means that recipe body was not
successfully generated.
}],
@@ -284,12 +312,38 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> {
"::mlir::TypedValue<::mlir::acc::MappableType>":$var,
"::llvm::StringRef":$varName,
"::mlir::ValueRange":$extents,
- "::mlir::Value":$initVal),
+ "::mlir::Value":$initVal,
+ "bool &":$needsDestroy),
/*methodBody=*/"",
/*defaultImplementation=*/[{
return {};
}]
>,
+ InterfaceMethod<
+ /*description=*/[{
+ Generates destruction operations for a privatized value previously
+ produced by `generatePrivateInit`. This is typically inserted in a
+ recipe's destroy region, after all uses of the privatized value.
+
+ The `privatized` value is the SSA value yielded by the init region
+ (and passed as the privatized argument to the destroy region).
+ Implementations should free heap-allocated storage or perform any
+ cleanup required for the given type. If no destruction is required,
+ this function should be a no-op and return `true`.
+
+ Returns true if destruction was successfully generated or deemed not
+ necessary, false otherwise.
+ }],
+ /*retTy=*/"bool",
+ /*methodName=*/"generatePrivateDestroy",
+ /*args=*/(ins "::mlir::OpBuilder &":$builder,
+ "::mlir::Location":$loc,
+ "::mlir::Value":$privatized),
+ /*methodBody=*/"",
+ /*defaultImplementation=*/[{
+ return true;
+ }]
+ >,
];
}
diff --git a/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td b/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
index 29b384f..b9d7163 100644
--- a/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
+++ b/mlir/include/mlir/Dialect/Shard/IR/ShardOps.td
@@ -174,7 +174,7 @@ def Shard_NeighborsLinearIndicesOp : Shard_Op<"neighbors_linear_indices", [
```
The above returns two indices, `633` and `693`, which correspond to the
index of the previous process `(1, 1, 3)`, and the next process
- `(1, 3, 3) along the split axis `1`.
+ `(1, 3, 3)` along the split axis `1`.
A negative value is returned if there is no neighbor in the respective
direction along the given `split_axes`.
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 6e79085..6e15b1e 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -2999,6 +2999,7 @@ def Vector_StepOp : Vector_Op<"step", [
}];
let results = (outs VectorOfRankAndType<[1], [Index]>:$result);
let assemblyFormat = "attr-dict `:` type($result)";
+ let hasCanonicalizer = 1;
}
def Vector_YieldOp : Vector_Op<"yield", [
diff --git a/mlir/include/mlir/IR/Remarks.h b/mlir/include/mlir/IR/Remarks.h
index 20e84ec..9877926 100644
--- a/mlir/include/mlir/IR/Remarks.h
+++ b/mlir/include/mlir/IR/Remarks.h
@@ -18,7 +18,6 @@
#include "llvm/Remarks/Remark.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Regex.h"
-#include <optional>
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/MLIRContext.h"
@@ -144,7 +143,7 @@ public:
llvm::StringRef getCategoryName() const { return categoryName; }
- llvm::StringRef getFullCategoryName() const {
+ llvm::StringRef getCombinedCategoryName() const {
if (categoryName.empty() && subCategoryName.empty())
return {};
if (subCategoryName.empty())
@@ -318,7 +317,7 @@ private:
};
//===----------------------------------------------------------------------===//
-// MLIR Remark Streamer
+// Pluggable Remark Utilities
//===----------------------------------------------------------------------===//
/// Base class for MLIR remark streamers that is used to stream
@@ -338,6 +337,26 @@ public:
virtual void finalize() {} // optional
};
+using ReportFn = llvm::unique_function<void(const Remark &)>;
+
+/// Base class for MLIR remark emitting policies that is used to emit
+/// optimization remarks to the underlying remark streamer. The derived classes
+/// should implement the `reportRemark` method to provide the actual emitting
+/// implementation.
+class RemarkEmittingPolicyBase {
+protected:
+ ReportFn reportImpl;
+
+public:
+ RemarkEmittingPolicyBase() = default;
+ virtual ~RemarkEmittingPolicyBase() = default;
+
+ void initialize(ReportFn fn) { reportImpl = std::move(fn); }
+
+ virtual void reportRemark(const Remark &remark) = 0;
+ virtual void finalize() = 0;
+};
+
//===----------------------------------------------------------------------===//
// Remark Engine (MLIR Context will own this class)
//===----------------------------------------------------------------------===//
@@ -355,6 +374,8 @@ private:
std::optional<llvm::Regex> failedFilter;
/// The MLIR remark streamer that will be used to emit the remarks.
std::unique_ptr<MLIRRemarkStreamerBase> remarkStreamer;
+ /// The MLIR remark policy that will be used to emit the remarks.
+ std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy;
/// When is enabled, engine also prints remarks as mlir::emitRemarks.
bool printAsEmitRemarks = false;
@@ -392,6 +413,8 @@ private:
InFlightRemark emitIfEnabled(Location loc, RemarkOpts opts,
bool (RemarkEngine::*isEnabled)(StringRef)
const);
+ /// Report a remark.
+ void reportImpl(const Remark &remark);
public:
/// Default constructor is deleted, use the other constructor.
@@ -407,8 +430,15 @@ public:
~RemarkEngine();
/// Setup the remark engine with the given output path and format.
- LogicalResult initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
- std::string *errMsg);
+ LogicalResult
+ initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
+ std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy,
+ std::string *errMsg);
+
+ /// Get the remark emitting policy.
+ RemarkEmittingPolicyBase *getRemarkEmittingPolicy() const {
+ return remarkEmittingPolicy.get();
+ }
/// Report a remark.
void report(const Remark &&remark);
@@ -446,6 +476,46 @@ inline InFlightRemark withEngine(Fn fn, Location loc, Args &&...args) {
namespace mlir::remark {
+//===----------------------------------------------------------------------===//
+// Remark Emitting Policies
+//===----------------------------------------------------------------------===//
+
+/// Policy that emits all remarks.
+class RemarkEmittingPolicyAll : public detail::RemarkEmittingPolicyBase {
+public:
+ RemarkEmittingPolicyAll();
+
+ void reportRemark(const detail::Remark &remark) override {
+ assert(reportImpl && "reportImpl is not set");
+ reportImpl(remark);
+ }
+ void finalize() override {}
+};
+
+/// Policy that emits final remarks.
+class RemarkEmittingPolicyFinal : public detail::RemarkEmittingPolicyBase {
+private:
+ /// user can intercept them for custom processing via a registered callback,
+ /// otherwise they will be reported on engine destruction.
+ llvm::DenseSet<detail::Remark> postponedRemarks;
+
+public:
+ RemarkEmittingPolicyFinal();
+
+ void reportRemark(const detail::Remark &remark) override {
+ postponedRemarks.erase(remark);
+ postponedRemarks.insert(remark);
+ }
+
+ void finalize() override {
+ assert(reportImpl && "reportImpl is not set");
+ for (auto &remark : postponedRemarks) {
+ if (reportImpl)
+ reportImpl(remark);
+ }
+ }
+};
+
/// Create a Reason with llvm::formatv formatting.
template <class... Ts>
inline detail::LazyTextBuild reason(const char *fmt, Ts &&...ts) {
@@ -505,16 +575,72 @@ inline detail::InFlightRemark analysis(Location loc, RemarkOpts opts) {
/// Setup remarks for the context. This function will enable the remark engine
/// and set the streamer to be used for optimization remarks. The remark
-/// categories are used to filter the remarks that will be emitted by the remark
-/// engine. If a category is not specified, it will not be emitted. If
+/// categories are used to filter the remarks that will be emitted by the
+/// remark engine. If a category is not specified, it will not be emitted. If
/// `printAsEmitRemarks` is true, the remarks will be printed as
/// mlir::emitRemarks. 'streamer' must inherit from MLIRRemarkStreamerBase and
/// will be used to stream the remarks.
LogicalResult enableOptimizationRemarks(
MLIRContext &ctx,
std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer,
+ std::unique_ptr<remark::detail::RemarkEmittingPolicyBase>
+ remarkEmittingPolicy,
const remark::RemarkCategories &cats, bool printAsEmitRemarks = false);
} // namespace mlir::remark
+// DenseMapInfo specialization for Remark
+namespace llvm {
+template <>
+struct DenseMapInfo<mlir::remark::detail::Remark> {
+ static constexpr StringRef kEmptyKey = "<EMPTY_KEY>";
+ static constexpr StringRef kTombstoneKey = "<TOMBSTONE_KEY>";
+
+ /// Helper to provide a static dummy context for sentinel keys.
+ static mlir::MLIRContext *getStaticDummyContext() {
+ static mlir::MLIRContext dummyContext;
+ return &dummyContext;
+ }
+
+ /// Create an empty remark
+ static inline mlir::remark::detail::Remark getEmptyKey() {
+ return mlir::remark::detail::Remark(
+ mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note,
+ mlir::UnknownLoc::get(getStaticDummyContext()),
+ mlir::remark::RemarkOpts::name(kEmptyKey));
+ }
+
+ /// Create a dead remark
+ static inline mlir::remark::detail::Remark getTombstoneKey() {
+ return mlir::remark::detail::Remark(
+ mlir::remark::RemarkKind::RemarkUnknown, mlir::DiagnosticSeverity::Note,
+ mlir::UnknownLoc::get(getStaticDummyContext()),
+ mlir::remark::RemarkOpts::name(kTombstoneKey));
+ }
+
+ /// Compute the hash value of the remark
+ static unsigned getHashValue(const mlir::remark::detail::Remark &remark) {
+ return llvm::hash_combine(
+ remark.getLocation().getAsOpaquePointer(),
+ llvm::hash_value(remark.getRemarkName()),
+ llvm::hash_value(remark.getCombinedCategoryName()));
+ }
+
+ static bool isEqual(const mlir::remark::detail::Remark &lhs,
+ const mlir::remark::detail::Remark &rhs) {
+ // Check for empty/tombstone keys first
+ if (lhs.getRemarkName() == kEmptyKey ||
+ lhs.getRemarkName() == kTombstoneKey ||
+ rhs.getRemarkName() == kEmptyKey ||
+ rhs.getRemarkName() == kTombstoneKey) {
+ return lhs.getRemarkName() == rhs.getRemarkName();
+ }
+
+ // For regular remarks, compare key identifying fields
+ return lhs.getLocation() == rhs.getLocation() &&
+ lhs.getRemarkName() == rhs.getRemarkName() &&
+ lhs.getCombinedCategoryName() == rhs.getCombinedCategoryName();
+ }
+};
+} // namespace llvm
#endif // MLIR_IR_REMARKS_H
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index a5feb59..72ed046 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_interface(DestinationStyleOpInterface)
add_mlir_interface(FunctionInterfaces)
add_mlir_interface(IndexingMapOpInterface)
add_mlir_interface(InferIntRangeInterface)
+add_mlir_interface(InferStridedMetadataInterface)
add_mlir_interface(InferTypeOpInterface)
add_mlir_interface(LoopLikeInterface)
add_mlir_interface(MemOpInterfaces)
diff --git a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
index 0e107e8..a6de3d1 100644
--- a/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
+++ b/mlir/include/mlir/Interfaces/InferIntRangeInterface.h
@@ -117,7 +117,8 @@ public:
IntegerValueRange(ConstantIntRanges value) : value(std::move(value)) {}
/// Create an integer value range lattice value.
- IntegerValueRange(std::optional<ConstantIntRanges> value = std::nullopt)
+ explicit IntegerValueRange(
+ std::optional<ConstantIntRanges> value = std::nullopt)
: value(std::move(value)) {}
/// Whether the range is uninitialized. This happens when the state hasn't
@@ -167,6 +168,15 @@ using SetIntRangeFn =
using SetIntLatticeFn =
llvm::function_ref<void(Value, const IntegerValueRange &)>;
+/// Helper callback type to get the integer range of a value.
+using GetIntRangeFn = function_ref<IntegerValueRange(Value)>;
+
+/// Helper function to collect the integer range values of an array of op fold
+/// results.
+SmallVector<IntegerValueRange> getIntValueRanges(ArrayRef<OpFoldResult> values,
+ GetIntRangeFn getIntRange,
+ int32_t indexBitwidth);
+
class InferIntRangeInterface;
namespace intrange::detail {
diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h
new file mode 100644
index 0000000..0c572e0
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.h
@@ -0,0 +1,145 @@
+//===- InferStridedMetadataInterface.h - Strided Metadata Inference -C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions of the strided metadata inference interface
+// defined in `InferStridedMetadataInterface.td`
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
+#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
+
+#include "mlir/Interfaces/InferIntRangeInterface.h"
+
+namespace mlir {
+/// A class that represents the strided metadata range information, including
+/// offsets, sizes, and strides as integer ranges.
+class StridedMetadataRange {
+public:
+ /// Default constructor creates uninitialized ranges.
+ StridedMetadataRange() = default;
+
+ /// Returns a ranked strided metadata range.
+ static StridedMetadataRange
+ getRanked(SmallVectorImpl<ConstantIntRanges> &&offsets,
+ SmallVectorImpl<ConstantIntRanges> &&sizes,
+ SmallVectorImpl<ConstantIntRanges> &&strides) {
+ return StridedMetadataRange(std::move(offsets), std::move(sizes),
+ std::move(strides));
+ }
+
+ /// Returns a strided metadata range with maximum ranges.
+ static StridedMetadataRange getMaxRanges(int32_t indexBitwidth,
+ int32_t offsetsRank,
+ int32_t sizeRank,
+ int32_t stridedRank) {
+ return StridedMetadataRange(
+ SmallVector<ConstantIntRanges>(
+ offsetsRank, ConstantIntRanges::maxRange(indexBitwidth)),
+ SmallVector<ConstantIntRanges>(
+ sizeRank, ConstantIntRanges::maxRange(indexBitwidth)),
+ SmallVector<ConstantIntRanges>(
+ stridedRank, ConstantIntRanges::maxRange(indexBitwidth)));
+ }
+
+ static StridedMetadataRange getMaxRanges(int32_t indexBitwidth,
+ int32_t rank) {
+ return getMaxRanges(indexBitwidth, 1, rank, rank);
+ }
+
+ /// Returns whether the metadata is uninitialized.
+ bool isUninitialized() const { return !offsets.has_value(); }
+
+ /// Get the offsets range.
+ ArrayRef<ConstantIntRanges> getOffsets() const {
+ return offsets ? *offsets : ArrayRef<ConstantIntRanges>();
+ }
+ MutableArrayRef<ConstantIntRanges> getOffsets() {
+ return offsets ? *offsets : MutableArrayRef<ConstantIntRanges>();
+ }
+
+ /// Get the sizes ranges.
+ ArrayRef<ConstantIntRanges> getSizes() const { return sizes; }
+ MutableArrayRef<ConstantIntRanges> getSizes() { return sizes; }
+
+ /// Get the strides ranges.
+ ArrayRef<ConstantIntRanges> getStrides() const { return strides; }
+ MutableArrayRef<ConstantIntRanges> getStrides() { return strides; }
+
+ /// Compare two strided metadata ranges.
+ bool operator==(const StridedMetadataRange &other) const {
+ return offsets == other.offsets && sizes == other.sizes &&
+ strides == other.strides;
+ }
+
+ /// Print the strided metadata range.
+ void print(raw_ostream &os) const;
+
+ /// Join two strided metadata ranges, by taking the element-wise union of the
+ /// metadata.
+ static StridedMetadataRange join(const StridedMetadataRange &lhs,
+ const StridedMetadataRange &rhs) {
+ if (lhs.isUninitialized())
+ return rhs;
+ if (rhs.isUninitialized())
+ return lhs;
+
+ // Helper fuction to compute the range union of constant ranges.
+ auto rangeUnion =
+ +[](const std::tuple<ConstantIntRanges, ConstantIntRanges> &lhsRhs)
+ -> ConstantIntRanges {
+ return std::get<0>(lhsRhs).rangeUnion(std::get<1>(lhsRhs));
+ };
+
+ // Get the elementwise range union. Note, that `zip_equal` will assert if
+ // sizes are not equal.
+ SmallVector<ConstantIntRanges> offsets = llvm::map_to_vector(
+ llvm::zip_equal(*lhs.offsets, *rhs.offsets), rangeUnion);
+ SmallVector<ConstantIntRanges> sizes =
+ llvm::map_to_vector(llvm::zip_equal(lhs.sizes, rhs.sizes), rangeUnion);
+ SmallVector<ConstantIntRanges> strides = llvm::map_to_vector(
+ llvm::zip_equal(lhs.strides, rhs.strides), rangeUnion);
+
+ // Return the joined metadata.
+ return StridedMetadataRange(std::move(offsets), std::move(sizes),
+ std::move(strides));
+ }
+
+private:
+ /// Create a strided metadata range with the given offset, sizes, and strides.
+ StridedMetadataRange(SmallVectorImpl<ConstantIntRanges> &&offsets,
+ SmallVectorImpl<ConstantIntRanges> &&sizes,
+ SmallVectorImpl<ConstantIntRanges> &&strides)
+ : offsets(std::move(offsets)), sizes(std::move(sizes)),
+ strides(std::move(strides)) {}
+
+ /// The offsets range.
+ std::optional<SmallVector<ConstantIntRanges>> offsets;
+
+ /// The sizes ranges.
+ SmallVector<ConstantIntRanges> sizes;
+
+ /// The strides ranges.
+ SmallVector<ConstantIntRanges> strides;
+};
+
+/// Print the strided metadata to `os`.
+inline raw_ostream &operator<<(raw_ostream &os,
+ const StridedMetadataRange &range) {
+ range.print(os);
+ return os;
+}
+
+/// Callback function type for setting the strided metadata of a value.
+using SetStridedMetadataRangeFn =
+ function_ref<void(Value, const StridedMetadataRange &)>;
+} // end namespace mlir
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.h.inc"
+
+#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE_H
diff --git a/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td
new file mode 100644
index 0000000..ee5b094
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/InferStridedMetadataInterface.td
@@ -0,0 +1,45 @@
+//===- InferStridedMetadataInterface.td - Strided MD Inference ----------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for strided metadata range analysis
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
+#define MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def InferStridedMetadataOpInterface :
+ OpInterface<"InferStridedMetadataOpInterface"> {
+ let description = [{
+ Allows operations to participate in strided metadata analysis by providing
+ methods that allow them to specify bounds on offsets, sizes, and strides
+ of their result(s) given bounds on their input(s) if known.
+ }];
+ let cppNamespace = "::mlir";
+
+ let methods = [
+ InterfaceMethod<[{
+ Infer the strided metadata bounds on the results of this op given
+ the bounds on its operands.
+ For each result value or block argument of interest, the method should
+ call `setMetadata` with that `Value` as an argument.
+ The `operands` parameter contains the strided metadata ranges for all the
+ operands of the operation in order.
+ The `getIntRange` callback is provided for obtaining the int-range
+ analysis result for a given value.
+ }],
+ "void", "inferStridedMetadataRanges",
+ (ins "::llvm::ArrayRef<::mlir::StridedMetadataRange>":$operands,
+ "::mlir::GetIntRangeFn":$getIntRange,
+ "::mlir::SetStridedMetadataRangeFn":$setMetadata,
+ "int32_t":$indexBitwidth)>
+ ];
+}
+#endif // MLIR_INTERFACES_INFERSTRIDEDMETADATAINTERFACE
diff --git a/mlir/include/mlir/Remark/RemarkStreamer.h b/mlir/include/mlir/Remark/RemarkStreamer.h
index 170d6b4..19a70fa 100644
--- a/mlir/include/mlir/Remark/RemarkStreamer.h
+++ b/mlir/include/mlir/Remark/RemarkStreamer.h
@@ -45,6 +45,7 @@ namespace mlir::remark {
/// mlir::emitRemarks.
LogicalResult enableOptimizationRemarksWithLLVMStreamer(
MLIRContext &ctx, StringRef filePath, llvm::remarks::Format fmt,
+ std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
const RemarkCategories &cat, bool printAsEmitRemarks = false);
} // namespace mlir::remark
diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
index 0fbe15f..b739438 100644
--- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
+++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h
@@ -44,6 +44,11 @@ enum class RemarkFormat {
REMARK_FORMAT_BITSTREAM,
};
+enum class RemarkPolicy {
+ REMARK_POLICY_ALL,
+ REMARK_POLICY_FINAL,
+};
+
/// Configuration options for the mlir-opt tool.
/// This is intended to help building tools like mlir-opt by collecting the
/// supported options.
@@ -242,6 +247,8 @@ public:
/// Set the reproducer output filename
RemarkFormat getRemarkFormat() const { return remarkFormatFlag; }
+ /// Set the remark policy to use.
+ RemarkPolicy getRemarkPolicy() const { return remarkPolicyFlag; }
/// Set the remark format to use.
std::string getRemarksAllFilter() const { return remarksAllFilterFlag; }
/// Set the remark output file.
@@ -265,6 +272,8 @@ protected:
/// Remark format
RemarkFormat remarkFormatFlag = RemarkFormat::REMARK_FORMAT_STDOUT;
+ /// Remark policy
+ RemarkPolicy remarkPolicyFlag = RemarkPolicy::REMARK_POLICY_ALL;
/// Remark file to output to
std::string remarksOutputFileFlag = "";
/// Remark filters
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 609cb34..db10ebc 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -40,6 +40,7 @@ add_mlir_library(MLIRAnalysis
DataFlow/IntegerRangeAnalysis.cpp
DataFlow/LivenessAnalysis.cpp
DataFlow/SparseAnalysis.cpp
+ DataFlow/StridedMetadataRangeAnalysis.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Analysis
@@ -53,6 +54,7 @@ add_mlir_library(MLIRAnalysis
MLIRDataLayoutInterfaces
MLIRFunctionInterfaces
MLIRInferIntRangeInterface
+ MLIRInferStridedMetadataInterface
MLIRInferTypeOpInterface
MLIRLoopLikeInterface
MLIRPresburger
diff --git a/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..01c9daf
--- /dev/null
+++ b/mlir/lib/Analysis/DataFlow/StridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,127 @@
+//===- StridedMetadataRangeAnalysis.cpp - Integer range analysis --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the dataflow analysis class for integer range inference
+// which is used in transformations over the `arith` dialect such as
+// branch elimination or signed->unsigned rewriting
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Dialect/Utils/IndexingUtils.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Support/DebugStringHelper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugLog.h"
+
+#define DEBUG_TYPE "strided-metadata-range-analysis"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+/// Get the entry state for a value. For any value that is not a ranked memref,
+/// this function sets the metadata to a top state with no offsets, sizes, or
+/// strides. For `memref` types, this function will use the metadata in the type
+/// to try to deduce as much informaiton as possible.
+static StridedMetadataRange getEntryStateImpl(Value v, int32_t indexBitwidth) {
+ // TODO: generalize this method with a type interface.
+ auto mTy = dyn_cast<BaseMemRefType>(v.getType());
+
+ // If not a memref or it's un-ranked, don't infer any metadata.
+ if (!mTy || !mTy.hasRank())
+ return StridedMetadataRange::getMaxRanges(indexBitwidth, 0, 0, 0);
+
+ // Get the top state.
+ auto metadata =
+ StridedMetadataRange::getMaxRanges(indexBitwidth, mTy.getRank());
+
+ // Compute the offset and strides.
+ int64_t offset;
+ SmallVector<int64_t> strides;
+ if (failed(cast<MemRefType>(mTy).getStridesAndOffset(strides, offset)))
+ return metadata;
+
+ // Refine the metadata if we know it from the type.
+ if (!ShapedType::isDynamic(offset)) {
+ metadata.getOffsets()[0] =
+ ConstantIntRanges::constant(APInt(indexBitwidth, offset));
+ }
+ for (auto &&[size, range] :
+ llvm::zip_equal(mTy.getShape(), metadata.getSizes())) {
+ if (ShapedType::isDynamic(size))
+ continue;
+ range = ConstantIntRanges::constant(APInt(indexBitwidth, size));
+ }
+ for (auto &&[stride, range] :
+ llvm::zip_equal(strides, metadata.getStrides())) {
+ if (ShapedType::isDynamic(stride))
+ continue;
+ range = ConstantIntRanges::constant(APInt(indexBitwidth, stride));
+ }
+
+ return metadata;
+}
+
+StridedMetadataRangeAnalysis::StridedMetadataRangeAnalysis(
+ DataFlowSolver &solver, int32_t indexBitwidth)
+ : SparseForwardDataFlowAnalysis(solver), indexBitwidth(indexBitwidth) {
+ assert(indexBitwidth > 0 && "invalid bitwidth");
+}
+
+void StridedMetadataRangeAnalysis::setToEntryState(
+ StridedMetadataRangeLattice *lattice) {
+ propagateIfChanged(lattice, lattice->join(getEntryStateImpl(
+ lattice->getAnchor(), indexBitwidth)));
+}
+
+LogicalResult StridedMetadataRangeAnalysis::visitOperation(
+ Operation *op, ArrayRef<const StridedMetadataRangeLattice *> operands,
+ ArrayRef<StridedMetadataRangeLattice *> results) {
+ auto inferrable = dyn_cast<InferStridedMetadataOpInterface>(op);
+
+ // Bail if we cannot reason about the op.
+ if (!inferrable) {
+ setAllToEntryStates(results);
+ return success();
+ }
+
+ LDBG() << "Inferring metadata for: "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
+
+ // Helper function to retrieve int range values.
+ auto getIntRange = [&](Value value) -> IntegerValueRange {
+ auto lattice = getOrCreateFor<IntegerValueRangeLattice>(
+ getProgramPointAfter(op), value);
+ return lattice ? lattice->getValue() : IntegerValueRange();
+ };
+
+ // Convert the arguments lattices to a vector.
+ SmallVector<StridedMetadataRange> argRanges = llvm::map_to_vector(
+ operands, [](const StridedMetadataRangeLattice *lattice) {
+ return lattice->getValue();
+ });
+
+ // Callback to set metadata on a result.
+ auto joinCallback = [&](Value v, const StridedMetadataRange &md) {
+ auto result = cast<OpResult>(v);
+ assert(llvm::is_contained(op->getResults(), result));
+ LDBG() << "- Inferred metadata: " << md;
+ StridedMetadataRangeLattice *lattice = results[result.getResultNumber()];
+ ChangeResult changed = lattice->join(md);
+ LDBG() << "- Joined metadata: " << lattice->getValue();
+ propagateIfChanged(lattice, changed);
+ };
+
+ // Infer the metadata.
+ inferrable.inferStridedMetadataRanges(argRanges, getIntRange, joinCallback,
+ indexBitwidth);
+ return success();
+}
diff --git a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
index 71687b1..ddcbc44 100644
--- a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
+++ b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
@@ -20,6 +20,7 @@
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
@@ -390,7 +391,8 @@ class LoadStoreToXeVMPattern : public OpConversionPattern<OpType> {
// Load result or Store valye Type can be vector or scalar.
Type valOrResTy;
if constexpr (std::is_same_v<OpType, xegpu::LoadGatherOp>)
- valOrResTy = op.getResult().getType();
+ valOrResTy =
+ this->getTypeConverter()->convertType(op.getResult().getType());
else
valOrResTy = adaptor.getValue().getType();
VectorType valOrResVecTy = dyn_cast<VectorType>(valOrResTy);
@@ -878,10 +880,30 @@ struct ConvertXeGPUToXeVMPass
}
return {};
};
- typeConverter.addSourceMaterialization(memrefMaterializationCast);
- typeConverter.addSourceMaterialization(ui64MaterializationCast);
- typeConverter.addSourceMaterialization(ui32MaterializationCast);
- typeConverter.addSourceMaterialization(vectorMaterializationCast);
+
+ // If result type of original op is single element vector and lowered type
+ // is scalar. This materialization cast creates a single element vector by
+ // broadcasting the scalar value.
+ auto singleElementVectorMaterializationCast =
+ [](OpBuilder &builder, Type type, ValueRange inputs,
+ Location loc) -> Value {
+ if (inputs.size() != 1)
+ return {};
+ auto input = inputs.front();
+ if (input.getType().isIntOrIndexOrFloat()) {
+ // If the input is a scalar, and the target type is a vector of single
+ // element, create a single element vector by broadcasting.
+ if (auto vecTy = dyn_cast<VectorType>(type)) {
+ if (vecTy.getNumElements() == 1) {
+ return vector::BroadcastOp::create(builder, loc, vecTy, input)
+ .getResult();
+ }
+ }
+ }
+ return {};
+ };
+ typeConverter.addSourceMaterialization(
+ singleElementVectorMaterializationCast);
typeConverter.addTargetMaterialization(memrefMaterializationCast);
typeConverter.addTargetMaterialization(ui32MaterializationCast);
typeConverter.addTargetMaterialization(ui64MaterializationCast);
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
index 624519f..70faa71 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
@@ -64,12 +64,13 @@ mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) {
module.walk([&](func::CallOp callOp) {
if (func::FuncOp calledFunc =
dyn_cast_or_null<func::FuncOp>(callOp.resolveCallable())) {
- callerMap[calledFunc].insert(callOp);
+ if (!calledFunc.isPublic() && !calledFunc.isExternal())
+ callerMap[calledFunc].insert(callOp);
}
});
for (auto funcOp : module.getOps<func::FuncOp>()) {
- if (funcOp.isExternal())
+ if (funcOp.isExternal() || funcOp.isPublic())
continue;
func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
// TODO: Support functions with multiple blocks.
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 5edcc40b..55fe0d9 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -309,6 +309,17 @@ LogicalResult ConvertBF16x2ToF8x2Op::verify() {
return success();
}
+LogicalResult ConvertF32x2ToF4x2Op::verify() {
+ mlir::MLIRContext *ctx = getContext();
+
+ if (!llvm::isa<mlir::Float4E2M1FNType>(getDstTy()))
+ return emitOpError("Only ")
+ << mlir::Float4E2M1FNType::get(ctx)
+ << " type is supported for conversions from f32x2 to f4x2.";
+
+ return success();
+}
+
LogicalResult BulkStoreOp::verify() {
if (getInitVal() != 0)
return emitOpError("only 0 is supported for initVal, got ") << getInitVal();
@@ -2047,6 +2058,23 @@ ConvertFloatToTF32Op::getIntrinsicID(NVVM::FPRoundingMode rnd,
}
}
+NVVM::IDArgPair
+ConvertF32x2ToF4x2Op::getIntrinsicIDAndArgs(NVVM::ConvertF32x2ToF4x2Op op,
+ LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase &builder) {
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(op.getA()));
+ args.push_back(mt.lookupValue(op.getB()));
+
+ bool hasRelu = op.getRelu();
+
+ llvm::Intrinsic::ID intId =
+ hasRelu ? llvm::Intrinsic::nvvm_ff_to_e2m1x2_rn_relu_satfinite
+ : llvm::Intrinsic::nvvm_ff_to_e2m1x2_rn_satfinite;
+
+ return {intId, std::move(args)};
+}
+
#define GET_F32x2_TO_F6x2_ID(type, has_relu) \
has_relu ? llvm::Intrinsic::nvvm_ff_to_##type##_rn_relu_satfinite \
: llvm::Intrinsic::nvvm_ff_to_##type##_rn_satfinite
@@ -2306,6 +2334,32 @@ static void nvvmInferResultRanges(Operation *op, Value result,
}
}
+/// Verify the range attribute satisfies LLVM ConstantRange constructor
+/// requirements for NVVM SpecialRangeableRegisterOp.
+static LogicalResult
+verifyConstantRangeAttr(Operation *op,
+ std::optional<LLVM::ConstantRangeAttr> rangeAttr) {
+ if (!rangeAttr)
+ return success();
+
+ const llvm::APInt &lower = rangeAttr->getLower();
+ const llvm::APInt &upper = rangeAttr->getUpper();
+
+ // Check LLVM ConstantRange constructor condition
+ if (lower == upper && !lower.isMaxValue() && !lower.isMinValue()) {
+ unsigned bitWidth = lower.getBitWidth();
+ llvm::APInt minVal = llvm::APInt::getMinValue(bitWidth);
+ llvm::APInt maxVal = llvm::APInt::getMaxValue(bitWidth);
+ return op->emitOpError(
+ "invalid range attribute: Lower == Upper, but they aren't min (")
+ << llvm::toString(minVal, 10, false) << ") or max ("
+ << llvm::toString(maxVal, 10, false)
+ << ") value! This is an invalid constant range.";
+ }
+
+ return success();
+}
+
static llvm::Value *getAsPackedI32(llvm::Value *arg,
llvm::IRBuilderBase &builder) {
return builder.CreateBitCast(arg,
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index c477c6c..dcc1ef9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -315,7 +315,8 @@ bool mlir::linalg::detail::isContractionBody(
Value yielded = getSourceSkipUnary(terminator->getOperand(0));
Operation *reductionOp = yielded.getDefiningOp();
- if (reductionOp->getNumResults() != 1 || reductionOp->getNumOperands() != 2) {
+ if (!reductionOp || reductionOp->getNumResults() != 1 ||
+ reductionOp->getNumOperands() != 2) {
errs << "expected reduction op to be binary";
return false;
}
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
index e25a012..1382c7ac 100644
--- a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -5,7 +5,7 @@ add_mlir_dialect_library(MLIRMemRefDialect
ValueBoundsOpInterfaceImpl.cpp
ADDITIONAL_HEADER_DIRS
- ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRefDialect
+ ${PROJECT_SOURCE_DIR}/inlude/mlir/Dialect/MemRef/IR
DEPENDS
MLIRMemRefOpsIncGen
@@ -18,6 +18,7 @@ add_mlir_dialect_library(MLIRMemRefDialect
MLIRDialectUtils
MLIRInferIntRangeCommon
MLIRInferIntRangeInterface
+ MLIRInferStridedMetadataInterface
MLIRInferTypeOpInterface
MLIRIR
MLIRMemOpInterfaces
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index e9bdcda..507597b 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -3437,6 +3437,65 @@ SubViewOp::bubbleDownCasts(OpBuilder &builder) {
return bubbleDownCastsPassthroughOpImpl(*this, builder, getSourceMutable());
}
+void SubViewOp::inferStridedMetadataRanges(
+ ArrayRef<StridedMetadataRange> ranges, GetIntRangeFn getIntRange,
+ SetStridedMetadataRangeFn setMetadata, int32_t indexBitwidth) {
+ auto isUninitialized =
+ +[](IntegerValueRange range) { return range.isUninitialized(); };
+
+ // Bail early if any of the operands metadata is not ready:
+ SmallVector<IntegerValueRange> offsetOperands =
+ getIntValueRanges(getMixedOffsets(), getIntRange, indexBitwidth);
+ if (llvm::any_of(offsetOperands, isUninitialized))
+ return;
+
+ SmallVector<IntegerValueRange> sizeOperands =
+ getIntValueRanges(getMixedSizes(), getIntRange, indexBitwidth);
+ if (llvm::any_of(sizeOperands, isUninitialized))
+ return;
+
+ SmallVector<IntegerValueRange> stridesOperands =
+ getIntValueRanges(getMixedStrides(), getIntRange, indexBitwidth);
+ if (llvm::any_of(stridesOperands, isUninitialized))
+ return;
+
+ StridedMetadataRange sourceRange =
+ ranges[getSourceMutable().getOperandNumber()];
+ if (sourceRange.isUninitialized())
+ return;
+
+ ArrayRef<ConstantIntRanges> srcStrides = sourceRange.getStrides();
+
+ // Get the dropped dims.
+ llvm::SmallBitVector droppedDims = getDroppedDims();
+
+ // Compute the new offset, strides and sizes.
+ ConstantIntRanges offset = sourceRange.getOffsets()[0];
+ SmallVector<ConstantIntRanges> strides, sizes;
+
+ for (size_t i = 0, e = droppedDims.size(); i < e; ++i) {
+ bool dropped = droppedDims.test(i);
+ // Compute the new offset.
+ ConstantIntRanges off =
+ intrange::inferMul({offsetOperands[i].getValue(), srcStrides[i]});
+ offset = intrange::inferAdd({offset, off});
+
+ // Skip dropped dimensions.
+ if (dropped)
+ continue;
+ // Multiply the strides.
+ strides.push_back(
+ intrange::inferMul({stridesOperands[i].getValue(), srcStrides[i]}));
+ // Get the sizes.
+ sizes.push_back(sizeOperands[i].getValue());
+ }
+
+ setMetadata(getResult(),
+ StridedMetadataRange::getRanked(
+ SmallVector<ConstantIntRanges>({std::move(offset)}),
+ std::move(sizes), std::move(strides)));
+}
+
//===----------------------------------------------------------------------===//
// TransposeOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 6564a4e..642ced9 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -17,6 +17,7 @@
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/Matchers.h"
#include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/SymbolTable.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallSet.h"
@@ -74,14 +75,16 @@ struct MemRefPointerLikeModel
}
mlir::Value genAllocate(Type pointer, OpBuilder &builder, Location loc,
- StringRef varName, Type varType,
- Value originalVar) const {
+ StringRef varName, Type varType, Value originalVar,
+ bool &needsFree) const {
auto memrefTy = cast<MemRefType>(pointer);
// Check if this is a static memref (all dimensions are known) - if yes
// then we can generate an alloca operation.
- if (memrefTy.hasStaticShape())
+ if (memrefTy.hasStaticShape()) {
+ needsFree = false; // alloca doesn't need deallocation
return memref::AllocaOp::create(builder, loc, memrefTy).getResult();
+ }
// For dynamic memrefs, extract sizes from the original variable if
// provided. Otherwise they cannot be handled.
@@ -99,6 +102,7 @@ struct MemRefPointerLikeModel
// Note: We only add dynamic sizes to the dynamicSizes array
// Static dimensions are handled automatically by AllocOp
}
+ needsFree = true; // alloc needs deallocation
return memref::AllocOp::create(builder, loc, memrefTy, dynamicSizes)
.getResult();
}
@@ -108,10 +112,14 @@ struct MemRefPointerLikeModel
}
bool genFree(Type pointer, OpBuilder &builder, Location loc,
- TypedValue<PointerLikeType> varPtr, Type varType) const {
- if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varPtr)) {
+ TypedValue<PointerLikeType> varToFree, Value allocRes,
+ Type varType) const {
+ if (auto memrefValue = dyn_cast<TypedValue<MemRefType>>(varToFree)) {
+ // Use allocRes if provided to determine the allocation type
+ Value valueToInspect = allocRes ? allocRes : memrefValue;
+
// Walk through casts to find the original allocation
- Value currentValue = memrefValue;
+ Value currentValue = valueToInspect;
Operation *originalAlloc = nullptr;
// Follow the chain of operations to find the original allocation
@@ -150,7 +158,7 @@ struct MemRefPointerLikeModel
return true;
}
if (isa<memref::AllocOp>(originalAlloc)) {
- // This is an alloc - generate dealloc
+ // This is an alloc - generate dealloc on varToFree
memref::DeallocOp::create(builder, loc, memrefValue);
return true;
}
@@ -1003,6 +1011,142 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
}
};
+//===----------------------------------------------------------------------===//
+// Recipe Region Helpers
+//===----------------------------------------------------------------------===//
+
+/// Create and populate an init region for privatization recipes.
+/// Returns the init block on success, or nullptr on failure.
+/// Sets needsFree to indicate if the allocated memory requires deallocation.
+static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
+ Type varType, StringRef varName,
+ ValueRange bounds,
+ bool &needsFree) {
+ // Create init block with arguments: original value + bounds
+ SmallVector<Type> argTypes{varType};
+ SmallVector<Location> argLocs{loc};
+ for (Value bound : bounds) {
+ argTypes.push_back(bound.getType());
+ argLocs.push_back(loc);
+ }
+
+ auto initBlock = std::make_unique<Block>();
+ initBlock->addArguments(argTypes, argLocs);
+ builder.setInsertionPointToStart(initBlock.get());
+
+ Value privatizedValue;
+
+ // Get the block argument that represents the original variable
+ Value blockArgVar = initBlock->getArgument(0);
+
+ // Generate init region body based on variable type
+ if (isa<MappableType>(varType)) {
+ auto mappableTy = cast<MappableType>(varType);
+ auto typedVar = cast<TypedValue<MappableType>>(blockArgVar);
+ privatizedValue = mappableTy.generatePrivateInit(
+ builder, loc, typedVar, varName, bounds, {}, needsFree);
+ if (!privatizedValue)
+ return nullptr;
+ } else {
+ assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+ auto pointerLikeTy = cast<PointerLikeType>(varType);
+ // Use PointerLikeType's allocation API with the block argument
+ privatizedValue = pointerLikeTy.genAllocate(builder, loc, varName, varType,
+ blockArgVar, needsFree);
+ if (!privatizedValue)
+ return nullptr;
+ }
+
+ // Add yield operation to init block
+ acc::YieldOp::create(builder, loc, privatizedValue);
+
+ return initBlock;
+}
+
+/// Create and populate a copy region for firstprivate recipes.
+/// Returns the copy block on success, or nullptr on failure.
+/// TODO: Handle MappableType - it does not yet have a copy API.
+static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
+ Type varType,
+ ValueRange bounds) {
+ // Create copy block with arguments: original value + privatized value +
+ // bounds
+ SmallVector<Type> copyArgTypes{varType, varType};
+ SmallVector<Location> copyArgLocs{loc, loc};
+ for (Value bound : bounds) {
+ copyArgTypes.push_back(bound.getType());
+ copyArgLocs.push_back(loc);
+ }
+
+ auto copyBlock = std::make_unique<Block>();
+ copyBlock->addArguments(copyArgTypes, copyArgLocs);
+ builder.setInsertionPointToStart(copyBlock.get());
+
+ bool isMappable = isa<MappableType>(varType);
+ bool isPointerLike = isa<PointerLikeType>(varType);
+ // TODO: Handle MappableType - it does not yet have a copy API.
+ // Otherwise, for now just fallback to pointer-like behavior.
+ if (isMappable && !isPointerLike)
+ return nullptr;
+
+ // Generate copy region body based on variable type
+ if (isPointerLike) {
+ auto pointerLikeTy = cast<PointerLikeType>(varType);
+ Value originalArg = copyBlock->getArgument(0);
+ Value privatizedArg = copyBlock->getArgument(1);
+
+ // Generate copy operation using PointerLikeType interface
+ if (!pointerLikeTy.genCopy(
+ builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg),
+ cast<TypedValue<PointerLikeType>>(originalArg), varType))
+ return nullptr;
+ }
+
+ // Add terminator to copy block
+ acc::TerminatorOp::create(builder, loc);
+
+ return copyBlock;
+}
+
+/// Create and populate a destroy region for privatization recipes.
+/// Returns the destroy block on success, or nullptr if not needed.
+static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
+ Location loc, Type varType,
+ Value allocRes,
+ ValueRange bounds) {
+ // Create destroy block with arguments: original value + privatized value +
+ // bounds
+ SmallVector<Type> destroyArgTypes{varType, varType};
+ SmallVector<Location> destroyArgLocs{loc, loc};
+ for (Value bound : bounds) {
+ destroyArgTypes.push_back(bound.getType());
+ destroyArgLocs.push_back(loc);
+ }
+
+ auto destroyBlock = std::make_unique<Block>();
+ destroyBlock->addArguments(destroyArgTypes, destroyArgLocs);
+ builder.setInsertionPointToStart(destroyBlock.get());
+
+ bool isMappable = isa<MappableType>(varType);
+ bool isPointerLike = isa<PointerLikeType>(varType);
+ // TODO: Handle MappableType - it does not yet have a deallocation API.
+ // Otherwise, for now just fallback to pointer-like behavior.
+ if (isMappable && !isPointerLike)
+ return nullptr;
+
+ assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+ auto pointerLikeTy = cast<PointerLikeType>(varType);
+ auto privatizedArg =
+ cast<TypedValue<PointerLikeType>>(destroyBlock->getArgument(1));
+ // Pass allocRes to help determine the allocation type
+ if (!pointerLikeTy.genFree(builder, loc, privatizedArg, allocRes, varType))
+ return nullptr;
+
+ acc::TerminatorOp::create(builder, loc);
+
+ return destroyBlock;
+}
+
} // namespace
//===----------------------------------------------------------------------===//
@@ -1050,6 +1194,55 @@ LogicalResult acc::PrivateRecipeOp::verifyRegions() {
return success();
}
+std::optional<PrivateRecipeOp>
+PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
+ StringRef recipeName, Type varType,
+ StringRef varName, ValueRange bounds) {
+ // First, validate that we can handle this variable type
+ bool isMappable = isa<MappableType>(varType);
+ bool isPointerLike = isa<PointerLikeType>(varType);
+
+ // Unsupported type
+ if (!isMappable && !isPointerLike)
+ return std::nullopt;
+
+ // Create init and destroy blocks using shared helpers
+ OpBuilder::InsertionGuard guard(builder);
+
+ // Save the original insertion point for creating the recipe operation later
+ auto originalInsertionPoint = builder.saveInsertionPoint();
+
+ bool needsFree = false;
+ auto initBlock =
+ createInitRegion(builder, loc, varType, varName, bounds, needsFree);
+ if (!initBlock)
+ return std::nullopt;
+
+ // Only create destroy region if the allocation needs deallocation
+ std::unique_ptr<Block> destroyBlock;
+ if (needsFree) {
+ // Extract the allocated value from the init block's yield operation
+ auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+ Value allocRes = yieldOp.getOperand(0);
+
+ destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
+ if (!destroyBlock)
+ return std::nullopt;
+ }
+
+ // Now create the recipe operation at the original insertion point and attach
+ // the blocks
+ builder.restoreInsertionPoint(originalInsertionPoint);
+ auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
+
+ // Move the blocks into the recipe's regions
+ recipe.getInitRegion().push_back(initBlock.release());
+ if (destroyBlock)
+ recipe.getDestroyRegion().push_back(destroyBlock.release());
+
+ return recipe;
+}
+
//===----------------------------------------------------------------------===//
// FirstprivateRecipeOp
//===----------------------------------------------------------------------===//
@@ -1080,6 +1273,60 @@ LogicalResult acc::FirstprivateRecipeOp::verifyRegions() {
return success();
}
+std::optional<FirstprivateRecipeOp>
+FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
+ StringRef recipeName, Type varType,
+ StringRef varName, ValueRange bounds) {
+ // First, validate that we can handle this variable type
+ bool isMappable = isa<MappableType>(varType);
+ bool isPointerLike = isa<PointerLikeType>(varType);
+
+ // Unsupported type
+ if (!isMappable && !isPointerLike)
+ return std::nullopt;
+
+ // Create init, copy, and destroy blocks using shared helpers
+ OpBuilder::InsertionGuard guard(builder);
+
+ // Save the original insertion point for creating the recipe operation later
+ auto originalInsertionPoint = builder.saveInsertionPoint();
+
+ bool needsFree = false;
+ auto initBlock =
+ createInitRegion(builder, loc, varType, varName, bounds, needsFree);
+ if (!initBlock)
+ return std::nullopt;
+
+ auto copyBlock = createCopyRegion(builder, loc, varType, bounds);
+ if (!copyBlock)
+ return std::nullopt;
+
+ // Only create destroy region if the allocation needs deallocation
+ std::unique_ptr<Block> destroyBlock;
+ if (needsFree) {
+ // Extract the allocated value from the init block's yield operation
+ auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+ Value allocRes = yieldOp.getOperand(0);
+
+ destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
+ if (!destroyBlock)
+ return std::nullopt;
+ }
+
+ // Now create the recipe operation at the original insertion point and attach
+ // the blocks
+ builder.restoreInsertionPoint(originalInsertionPoint);
+ auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
+
+ // Move the blocks into the recipe's regions
+ recipe.getInitRegion().push_back(initBlock.release());
+ recipe.getCopyRegion().push_back(copyBlock.release());
+ if (destroyBlock)
+ recipe.getDestroyRegion().push_back(destroyBlock.release());
+
+ return recipe;
+}
+
//===----------------------------------------------------------------------===//
// ReductionRecipeOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 58256b0..45c54c7 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -7601,6 +7601,111 @@ void StepOp::inferResultRanges(ArrayRef<ConstantIntRanges> argRanges,
setResultRanges(getResult(), result);
}
+namespace {
+
+/// Fold `vector.step -> arith.cmpi` when the step value is compared to a
+/// constant large enough such that the result is the same at all indices.
+///
+/// For example, rewrite the 'greater than' comparison below,
+///
+/// ```mlir
+/// %cst = arith.constant dense<7> : vector<3xindex>
+/// %stp = vector.step : vector<3xindex>
+/// %out = arith.cmpi ugt, %stp, %cst : vector<3xindex>
+/// ```
+///
+/// as,
+///
+/// ```mlir
+/// %out = arith.constant dense<false> : vector<3xi1>.
+/// ```
+///
+/// Above `[0, 1, 2] > [7, 7, 7]` => `[false, false, false]`. Because the result
+/// is false at ALL indices we fold. If the constant was 1, then
+/// `[0, 1, 2] > [1, 1, 1]` => `[false, false, true]` and we do fold,
+/// conservatively preferring the 'compact' vector.step representation.
+///
+/// Note: this folder only works for the case where the constant (`%cst` above)
+/// is the second operand of the comparison. The arith.cmpi canonicalizer will
+/// ensure that constants are always second (on the right).
+struct StepCompareFolder : public OpRewritePattern<StepOp> {
+ using Base::Base;
+
+ LogicalResult matchAndRewrite(StepOp stepOp,
+ PatternRewriter &rewriter) const override {
+ const int64_t stepSize = stepOp.getResult().getType().getNumElements();
+
+ for (OpOperand &use : stepOp.getResult().getUses()) {
+ auto cmpiOp = dyn_cast<arith::CmpIOp>(use.getOwner());
+ if (!cmpiOp)
+ continue;
+
+ // arith.cmpi canonicalizer makes constants final operands.
+ const unsigned stepOperandNumber = use.getOperandNumber();
+ if (stepOperandNumber != 0)
+ continue;
+
+ // Check that operand 1 is a constant.
+ unsigned constOperandNumber = 1;
+ Value otherOperand = cmpiOp.getOperand(constOperandNumber);
+ std::optional<int64_t> maybeConstValue =
+ getConstantIntValue(otherOperand);
+ if (!maybeConstValue.has_value())
+ continue;
+
+ int64_t constValue = maybeConstValue.value();
+ arith::CmpIPredicate pred = cmpiOp.getPredicate();
+
+ auto maybeSplat = [&]() -> std::optional<bool> {
+ // Handle ult (unsigned less than) and uge (unsigned greater equal).
+ if ((pred == arith::CmpIPredicate::ult ||
+ pred == arith::CmpIPredicate::uge) &&
+ stepSize <= constValue)
+ return pred == arith::CmpIPredicate::ult;
+
+ // Handle ule and ugt.
+ if ((pred == arith::CmpIPredicate::ule ||
+ pred == arith::CmpIPredicate::ugt) &&
+ stepSize - 1 <= constValue) {
+ return pred == arith::CmpIPredicate::ule;
+ }
+
+ // Handle eq and ne.
+ if ((pred == arith::CmpIPredicate::eq ||
+ pred == arith::CmpIPredicate::ne) &&
+ stepSize <= constValue)
+ return pred == arith::CmpIPredicate::ne;
+
+ return std::nullopt;
+ }();
+
+ if (!maybeSplat.has_value())
+ continue;
+
+ rewriter.setInsertionPointAfter(cmpiOp);
+
+ auto type = dyn_cast<VectorType>(cmpiOp.getResult().getType());
+ if (!type)
+ continue;
+
+ auto boolAttr = DenseElementsAttr::get(type, maybeSplat.value());
+ Value splat = mlir::arith::ConstantOp::create(rewriter, cmpiOp.getLoc(),
+ type, boolAttr);
+
+ rewriter.replaceOp(cmpiOp, splat);
+ return success();
+ }
+
+ return failure();
+ }
+};
+} // namespace
+
+void StepOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<StepCompareFolder>(context);
+}
+
//===----------------------------------------------------------------------===//
// Vector Masking Utilities
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
index e95338f..12e6475 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
@@ -928,17 +928,20 @@ struct WarpOpDeadResult : public WarpDistributionPattern {
// Some values may be yielded multiple times and correspond to multiple
// results. Deduplicating occurs by taking each result with its matching
// yielded value, and:
- // 1. recording the unique first position at which the value is yielded.
+ // 1. recording the unique first position at which the value with uses is
+ // yielded.
// 2. recording for the result, the first position at which the dedup'ed
// value is yielded.
// 3. skipping from the new result types / new yielded values any result
// that has no use or whose yielded value has already been seen.
for (OpResult result : warpOp.getResults()) {
+ if (result.use_empty())
+ continue;
Value yieldOperand = yield.getOperand(result.getResultNumber());
auto it = dedupYieldOperandPositionMap.insert(
std::make_pair(yieldOperand, newResultTypes.size()));
dedupResultPositionMap.insert(std::make_pair(result, it.first->second));
- if (result.use_empty() || !it.second)
+ if (!it.second)
continue;
newResultTypes.push_back(result.getType());
newYieldValues.push_back(yieldOperand);
@@ -1843,16 +1846,16 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
newWarpOpDistTypes.append(escapingValueDistTypesElse.begin(),
escapingValueDistTypesElse.end());
- llvm::SmallDenseMap<unsigned, unsigned> origToNewYieldIdx;
for (auto [idx, val] :
llvm::zip_equal(nonIfYieldIndices, nonIfYieldValues)) {
- origToNewYieldIdx[idx] = newWarpOpYieldValues.size();
newWarpOpYieldValues.push_back(val);
newWarpOpDistTypes.push_back(warpOp.getResult(idx).getType());
}
- // Create the new `WarpOp` with the updated yield values and types.
- WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
- rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes);
+ // Replace the old `WarpOp` with the new one that has additional yield
+ // values and types.
+ SmallVector<size_t> newIndices;
+ WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+ rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes, newIndices);
// `ifOp` returns the result of the inner warp op.
SmallVector<Type> newIfOpDistResTypes;
for (auto [i, res] : llvm::enumerate(ifOp.getResults())) {
@@ -1870,8 +1873,8 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPointAfter(newWarpOp);
auto newIfOp = scf::IfOp::create(
- rewriter, ifOp.getLoc(), newIfOpDistResTypes, newWarpOp.getResult(0),
- static_cast<bool>(ifOp.thenBlock()),
+ rewriter, ifOp.getLoc(), newIfOpDistResTypes,
+ newWarpOp.getResult(newIndices[0]), static_cast<bool>(ifOp.thenBlock()),
static_cast<bool>(ifOp.elseBlock()));
auto encloseRegionInWarpOp =
[&](Block *oldIfBranch, Block *newIfBranch,
@@ -1888,7 +1891,7 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
for (size_t i = 0; i < escapingValues.size();
++i, ++warpResRangeStart) {
innerWarpInputVals.push_back(
- newWarpOp.getResult(warpResRangeStart));
+ newWarpOp.getResult(newIndices[warpResRangeStart]));
escapeValToBlockArgIndex[escapingValues[i]] =
innerWarpInputTypes.size();
innerWarpInputTypes.push_back(escapingValueInputTypes[i]);
@@ -1936,17 +1939,8 @@ struct WarpOpScfIfOp : public WarpDistributionPattern {
// Update the users of `<- WarpOp.yield <- IfOp.yield` to use the new `IfOp`
// result.
for (auto [origIdx, newIdx] : ifResultMapping)
- rewriter.replaceAllUsesExcept(warpOp.getResult(origIdx),
+ rewriter.replaceAllUsesExcept(newWarpOp.getResult(origIdx),
newIfOp.getResult(newIdx), newIfOp);
- // Similarly, update any users of the `WarpOp` results that were not
- // results of the `IfOp`.
- for (auto [origIdx, newIdx] : origToNewYieldIdx)
- rewriter.replaceAllUsesWith(warpOp.getResult(origIdx),
- newWarpOp.getResult(newIdx));
- // Remove the original `WarpOp` and `IfOp`, they should not have any uses
- // at this point.
- rewriter.eraseOp(ifOp);
- rewriter.eraseOp(warpOp);
return success();
}
@@ -2065,19 +2059,16 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
escapingValueDistTypes.begin(),
escapingValueDistTypes.end());
// Next, we insert all non-`ForOp` yielded values and their distributed
- // types. We also create a mapping between the non-`ForOp` yielded value
- // index and the corresponding new `WarpOp` yield value index (needed to
- // update users later).
- llvm::SmallDenseMap<unsigned, unsigned> nonForResultMapping;
+ // types.
for (auto [i, v] :
llvm::zip_equal(nonForResultIndices, nonForYieldedValues)) {
- nonForResultMapping[i] = newWarpOpYieldValues.size();
newWarpOpYieldValues.push_back(v);
newWarpOpDistTypes.push_back(warpOp.getResult(i).getType());
}
// Create the new `WarpOp` with the updated yield values and types.
- WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
- rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes);
+ SmallVector<size_t> newIndices;
+ WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns(
+ rewriter, warpOp, newWarpOpYieldValues, newWarpOpDistTypes, newIndices);
// Next, we create a new `ForOp` with the init args yielded by the new
// `WarpOp`.
@@ -2086,7 +2077,7 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
// escaping values in the new `WarpOp`.
SmallVector<Value> newForOpOperands;
for (size_t i = 0; i < escapingValuesStartIdx; ++i)
- newForOpOperands.push_back(newWarpOp.getResult(i));
+ newForOpOperands.push_back(newWarpOp.getResult(newIndices[i]));
// Create a new `ForOp` outside the new `WarpOp` region.
OpBuilder::InsertionGuard g(rewriter);
@@ -2110,7 +2101,7 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
llvm::SmallDenseMap<Value, int64_t> argIndexMapping;
for (size_t i = escapingValuesStartIdx;
i < escapingValuesStartIdx + escapingValues.size(); ++i) {
- innerWarpInput.push_back(newWarpOp.getResult(i));
+ innerWarpInput.push_back(newWarpOp.getResult(newIndices[i]));
argIndexMapping[escapingValues[i - escapingValuesStartIdx]] =
innerWarpInputType.size();
innerWarpInputType.push_back(
@@ -2146,20 +2137,11 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
if (!innerWarp.getResults().empty())
scf::YieldOp::create(rewriter, forOp.getLoc(), innerWarp.getResults());
- // Update the users of original `WarpOp` results that were coming from the
+ // Update the users of the new `WarpOp` results that were coming from the
// original `ForOp` to the corresponding new `ForOp` result.
for (auto [origIdx, newIdx] : forResultMapping)
- rewriter.replaceAllUsesExcept(warpOp.getResult(origIdx),
+ rewriter.replaceAllUsesExcept(newWarpOp.getResult(origIdx),
newForOp.getResult(newIdx), newForOp);
- // Similarly, update any users of the `WarpOp` results that were not
- // results of the `ForOp`.
- for (auto [origIdx, newIdx] : nonForResultMapping)
- rewriter.replaceAllUsesWith(warpOp.getResult(origIdx),
- newWarpOp.getResult(newIdx));
- // Remove the original `WarpOp` and `ForOp`, they should not have any uses
- // at this point.
- rewriter.eraseOp(forOp);
- rewriter.eraseOp(warpOp);
// Update any users of escaping values that were forwarded to the
// inner `WarpOp`. These values are now arguments of the inner `WarpOp`.
newForOp.walk([&](Operation *op) {
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
index 14639c5..fbae098 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorUnroll.cpp
@@ -465,26 +465,33 @@ struct UnrollElementwisePattern : public RewritePattern {
auto targetShape = getTargetShape(options, op);
if (!targetShape)
return failure();
+ int64_t targetShapeRank = targetShape->size();
auto dstVecType = cast<VectorType>(op->getResult(0).getType());
SmallVector<int64_t> originalSize =
*cast<VectorUnrollOpInterface>(op).getShapeForUnroll();
- // Bail-out if rank(source) != rank(target). The main limitation here is the
- // fact that `ExtractStridedSlice` requires the rank for the input and
- // output to match. If needed, we can relax this later.
- if (originalSize.size() != targetShape->size())
- return rewriter.notifyMatchFailure(
- op, "expected input vector rank to match target shape rank");
+ int64_t originalShapeRank = originalSize.size();
+
Location loc = op->getLoc();
+
+ // Handle rank mismatch by adding leading unit dimensions to targetShape
+ SmallVector<int64_t> adjustedTargetShape(originalShapeRank);
+ int64_t rankDiff = originalShapeRank - targetShapeRank;
+ std::fill(adjustedTargetShape.begin(),
+ adjustedTargetShape.begin() + rankDiff, 1);
+ std::copy(targetShape->begin(), targetShape->end(),
+ adjustedTargetShape.begin() + rankDiff);
+
+ int64_t adjustedTargetShapeRank = adjustedTargetShape.size();
// Prepare the result vector.
Value result = arith::ConstantOp::create(rewriter, loc, dstVecType,
rewriter.getZeroAttr(dstVecType));
- SmallVector<int64_t> strides(targetShape->size(), 1);
- VectorType newVecType =
+ SmallVector<int64_t> strides(adjustedTargetShapeRank, 1);
+ VectorType unrolledVecType =
VectorType::get(*targetShape, dstVecType.getElementType());
// Create the unrolled computation.
for (SmallVector<int64_t> offsets :
- StaticTileOffsetRange(originalSize, *targetShape)) {
+ StaticTileOffsetRange(originalSize, adjustedTargetShape)) {
SmallVector<Value> extractOperands;
for (OpOperand &operand : op->getOpOperands()) {
auto vecType = dyn_cast<VectorType>(operand.get().getType());
@@ -492,14 +499,31 @@ struct UnrollElementwisePattern : public RewritePattern {
extractOperands.push_back(operand.get());
continue;
}
- extractOperands.push_back(
- rewriter.createOrFold<vector::ExtractStridedSliceOp>(
- loc, operand.get(), offsets, *targetShape, strides));
+ Value extracted = rewriter.createOrFold<vector::ExtractStridedSliceOp>(
+ loc, operand.get(), offsets, adjustedTargetShape, strides);
+
+ // Reshape to remove leading unit dims if needed
+ if (adjustedTargetShapeRank > targetShapeRank) {
+ extracted = rewriter.createOrFold<vector::ShapeCastOp>(
+ loc, VectorType::get(*targetShape, vecType.getElementType()),
+ extracted);
+ }
+ extractOperands.push_back(extracted);
}
+
Operation *newOp = cloneOpWithOperandsAndTypes(
- rewriter, loc, op, extractOperands, newVecType);
+ rewriter, loc, op, extractOperands, unrolledVecType);
+
+ Value computeResult = newOp->getResult(0);
+
+ // Use strides sized to targetShape for proper insertion
+ SmallVector<int64_t> insertStrides =
+ (adjustedTargetShapeRank > targetShapeRank)
+ ? SmallVector<int64_t>(targetShapeRank, 1)
+ : strides;
+
result = rewriter.createOrFold<vector::InsertStridedSliceOp>(
- loc, newOp->getResult(0), result, offsets, strides);
+ loc, computeResult, result, offsets, insertStrides);
}
rewriter.replaceOp(op, result);
return success();
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index 776b5c6..4d81918 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -378,8 +378,10 @@ struct SourceMgrDiagnosticHandlerImpl {
}
// Otherwise, try to load the source file.
- std::string ignored;
- unsigned id = mgr.AddIncludeFile(std::string(filename), SMLoc(), ignored);
+ auto bufferOrErr = llvm::MemoryBuffer::getFile(filename);
+ if (!bufferOrErr)
+ return 0;
+ unsigned id = mgr.AddNewSourceBuffer(std::move(*bufferOrErr), SMLoc());
filenameToBufId[filename] = id;
return id;
}
diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp
index 1fa04ed..89b81cf 100644
--- a/mlir/lib/IR/MLIRContext.cpp
+++ b/mlir/lib/IR/MLIRContext.cpp
@@ -121,6 +121,11 @@ namespace mlir {
class MLIRContextImpl {
public:
//===--------------------------------------------------------------------===//
+ // Remark
+ //===--------------------------------------------------------------------===//
+ std::unique_ptr<remark::detail::RemarkEngine> remarkEngine;
+
+ //===--------------------------------------------------------------------===//
// Debugging
//===--------------------------------------------------------------------===//
@@ -135,11 +140,6 @@ public:
DiagnosticEngine diagEngine;
//===--------------------------------------------------------------------===//
- // Remark
- //===--------------------------------------------------------------------===//
- std::unique_ptr<remark::detail::RemarkEngine> remarkEngine;
-
- //===--------------------------------------------------------------------===//
// Options
//===--------------------------------------------------------------------===//
@@ -357,7 +357,10 @@ MLIRContext::MLIRContext(const DialectRegistry &registry, Threading setting)
impl->affineUniquer.registerParametricStorageType<IntegerSetStorage>();
}
-MLIRContext::~MLIRContext() = default;
+MLIRContext::~MLIRContext() {
+ // finalize remark engine before destroying anything else.
+ impl->remarkEngine.reset();
+}
/// Copy the specified array of elements into memory managed by the provided
/// bump pointer allocator. This assumes the elements are all PODs.
diff --git a/mlir/lib/IR/Remarks.cpp b/mlir/lib/IR/Remarks.cpp
index a55f61a..031eae2 100644
--- a/mlir/lib/IR/Remarks.cpp
+++ b/mlir/lib/IR/Remarks.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/StringRef.h"
using namespace mlir::remark::detail;
-
+using namespace mlir::remark;
//------------------------------------------------------------------------------
// Remark
//------------------------------------------------------------------------------
@@ -70,7 +70,7 @@ static void printArgs(llvm::raw_ostream &os, llvm::ArrayRef<Remark::Arg> args) {
void Remark::print(llvm::raw_ostream &os, bool printLocation) const {
// Header: [Type] pass:remarkName
StringRef type = getRemarkTypeString();
- StringRef categoryName = getFullCategoryName();
+ StringRef categoryName = getCombinedCategoryName();
StringRef name = remarkName;
os << '[' << type << "] ";
@@ -81,9 +81,10 @@ void Remark::print(llvm::raw_ostream &os, bool printLocation) const {
os << "Function=" << getFunction() << " | ";
if (printLocation) {
- if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation()))
+ if (auto flc = mlir::dyn_cast<mlir::FileLineColLoc>(getLocation())) {
os << " @" << flc.getFilename() << ":" << flc.getLine() << ":"
<< flc.getColumn();
+ }
}
printArgs(os, getArgs());
@@ -140,7 +141,7 @@ llvm::remarks::Remark Remark::generateRemark() const {
r.RemarkType = getRemarkType();
r.RemarkName = getRemarkName();
// MLIR does not use passes; instead, it has categories and sub-categories.
- r.PassName = getFullCategoryName();
+ r.PassName = getCombinedCategoryName();
r.FunctionName = getFunction();
r.Loc = locLambda();
for (const Remark::Arg &arg : getArgs()) {
@@ -225,26 +226,42 @@ InFlightRemark RemarkEngine::emitOptimizationRemarkAnalysis(Location loc,
// RemarkEngine
//===----------------------------------------------------------------------===//
-void RemarkEngine::report(const Remark &&remark) {
+void RemarkEngine::reportImpl(const Remark &remark) {
// Stream the remark
- if (remarkStreamer)
+ if (remarkStreamer) {
remarkStreamer->streamOptimizationRemark(remark);
+ }
// Print using MLIR's diagnostic
if (printAsEmitRemarks)
emitRemark(remark.getLocation(), remark.getMsg());
}
+void RemarkEngine::report(const Remark &&remark) {
+ if (remarkEmittingPolicy)
+ remarkEmittingPolicy->reportRemark(remark);
+}
+
RemarkEngine::~RemarkEngine() {
+ if (remarkEmittingPolicy)
+ remarkEmittingPolicy->finalize();
+
if (remarkStreamer)
remarkStreamer->finalize();
}
-llvm::LogicalResult
-RemarkEngine::initialize(std::unique_ptr<MLIRRemarkStreamerBase> streamer,
- std::string *errMsg) {
- // If you need to validate categories/filters, do so here and set errMsg.
+llvm::LogicalResult RemarkEngine::initialize(
+ std::unique_ptr<MLIRRemarkStreamerBase> streamer,
+ std::unique_ptr<RemarkEmittingPolicyBase> remarkEmittingPolicy,
+ std::string *errMsg) {
+
remarkStreamer = std::move(streamer);
+
+ auto reportFunc =
+ std::bind(&RemarkEngine::reportImpl, this, std::placeholders::_1);
+ remarkEmittingPolicy->initialize(ReportFn(std::move(reportFunc)));
+
+ this->remarkEmittingPolicy = std::move(remarkEmittingPolicy);
return success();
}
@@ -301,14 +318,15 @@ RemarkEngine::RemarkEngine(bool printAsEmitRemarks,
}
llvm::LogicalResult mlir::remark::enableOptimizationRemarks(
- MLIRContext &ctx,
- std::unique_ptr<remark::detail::MLIRRemarkStreamerBase> streamer,
- const remark::RemarkCategories &cats, bool printAsEmitRemarks) {
+ MLIRContext &ctx, std::unique_ptr<detail::MLIRRemarkStreamerBase> streamer,
+ std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
+ const RemarkCategories &cats, bool printAsEmitRemarks) {
auto engine =
- std::make_unique<remark::detail::RemarkEngine>(printAsEmitRemarks, cats);
+ std::make_unique<detail::RemarkEngine>(printAsEmitRemarks, cats);
std::string errMsg;
- if (failed(engine->initialize(std::move(streamer), &errMsg))) {
+ if (failed(engine->initialize(std::move(streamer),
+ std::move(remarkEmittingPolicy), &errMsg))) {
llvm::report_fatal_error(
llvm::Twine("Failed to initialize remark engine. Error: ") + errMsg);
}
@@ -316,3 +334,12 @@ llvm::LogicalResult mlir::remark::enableOptimizationRemarks(
return success();
}
+
+//===----------------------------------------------------------------------===//
+// Remark emitting policies
+//===----------------------------------------------------------------------===//
+
+namespace mlir::remark {
+RemarkEmittingPolicyAll::RemarkEmittingPolicyAll() = default;
+RemarkEmittingPolicyFinal::RemarkEmittingPolicyFinal() = default;
+} // namespace mlir::remark
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index 388de1c..f96af02 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_OPTIONAL_SOURCES
FunctionInterfaces.cpp
IndexingMapOpInterface.cpp
InferIntRangeInterface.cpp
+ InferStridedMetadataInterface.cpp
InferTypeOpInterface.cpp
LoopLikeInterface.cpp
MemOpInterfaces.cpp
@@ -64,6 +65,21 @@ add_mlir_library(MLIRFunctionInterfaces
add_mlir_interface_library(IndexingMapOpInterface)
add_mlir_interface_library(InferIntRangeInterface)
+
+add_mlir_library(MLIRInferStridedMetadataInterface
+ InferStridedMetadataInterface.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+ DEPENDS
+ MLIRInferStridedMetadataInterfaceIncGen
+
+ LINK_LIBS PUBLIC
+ MLIRInferIntRangeInterface
+ MLIRIR
+)
+
add_mlir_interface_library(InferTypeOpInterface)
add_mlir_library(MLIRLoopLikeInterface
diff --git a/mlir/lib/Interfaces/InferIntRangeInterface.cpp b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
index 9f3e97d..84fc9b8 100644
--- a/mlir/lib/Interfaces/InferIntRangeInterface.cpp
+++ b/mlir/lib/Interfaces/InferIntRangeInterface.cpp
@@ -146,6 +146,25 @@ raw_ostream &mlir::operator<<(raw_ostream &os, const IntegerValueRange &range) {
return os;
}
+SmallVector<IntegerValueRange>
+mlir::getIntValueRanges(ArrayRef<OpFoldResult> values,
+ GetIntRangeFn getIntRange, int32_t indexBitwidth) {
+ SmallVector<IntegerValueRange> ranges;
+ ranges.reserve(values.size());
+ for (OpFoldResult ofr : values) {
+ if (auto value = dyn_cast<Value>(ofr)) {
+ ranges.push_back(getIntRange(value));
+ continue;
+ }
+
+ // Create a constant range.
+ auto attr = cast<IntegerAttr>(cast<Attribute>(ofr));
+ ranges.emplace_back(ConstantIntRanges::constant(
+ attr.getValue().sextOrTrunc(indexBitwidth)));
+ }
+ return ranges;
+}
+
void mlir::intrange::detail::defaultInferResultRanges(
InferIntRangeInterface interface, ArrayRef<IntegerValueRange> argRanges,
SetIntLatticeFn setResultRanges) {
diff --git a/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp
new file mode 100644
index 0000000..483e9f1
--- /dev/null
+++ b/mlir/lib/Interfaces/InferStridedMetadataInterface.cpp
@@ -0,0 +1,36 @@
+//===- InferStridedMetadataInterface.cpp - Strided md inference interface -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/TypeUtilities.h"
+#include <optional>
+
+using namespace mlir;
+
+#include "mlir/Interfaces/InferStridedMetadataInterface.cpp.inc"
+
+void StridedMetadataRange::print(raw_ostream &os) const {
+ if (isUninitialized()) {
+ os << "strided_metadata<None>";
+ return;
+ }
+ os << "strided_metadata<offset = [";
+ llvm::interleaveComma(*offsets, os, [&](const ConstantIntRanges &range) {
+ os << "{" << range << "}";
+ });
+ os << "], sizes = [";
+ llvm::interleaveComma(sizes, os, [&](const ConstantIntRanges &range) {
+ os << "{" << range << "}";
+ });
+ os << "], strides = [";
+ llvm::interleaveComma(strides, os, [&](const ConstantIntRanges &range) {
+ os << "{" << range << "}";
+ });
+ os << "]>";
+}
diff --git a/mlir/lib/Remark/RemarkStreamer.cpp b/mlir/lib/Remark/RemarkStreamer.cpp
index d213a1a..bf36286 100644
--- a/mlir/lib/Remark/RemarkStreamer.cpp
+++ b/mlir/lib/Remark/RemarkStreamer.cpp
@@ -60,6 +60,7 @@ void LLVMRemarkStreamer::finalize() {
namespace mlir::remark {
LogicalResult enableOptimizationRemarksWithLLVMStreamer(
MLIRContext &ctx, StringRef path, llvm::remarks::Format fmt,
+ std::unique_ptr<detail::RemarkEmittingPolicyBase> remarkEmittingPolicy,
const RemarkCategories &cat, bool printAsEmitRemarks) {
FailureOr<std::unique_ptr<detail::MLIRRemarkStreamerBase>> sOr =
@@ -67,7 +68,8 @@ LogicalResult enableOptimizationRemarksWithLLVMStreamer(
if (failed(sOr))
return failure();
- return remark::enableOptimizationRemarks(ctx, std::move(*sOr), cat,
+ return remark::enableOptimizationRemarks(ctx, std::move(*sOr),
+ std::move(remarkEmittingPolicy), cat,
printAsEmitRemarks);
}
diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
index 4bbcd8e..db39c70 100644
--- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp
+++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp
@@ -34,11 +34,9 @@ Location DebugImporter::translateFuncLocation(llvm::Function *func) {
return UnknownLoc::get(context);
// Add a fused location to link the subprogram information.
- StringAttr funcName = StringAttr::get(context, subprogram->getName());
StringAttr fileName = StringAttr::get(context, subprogram->getFilename());
return FusedLocWith<DISubprogramAttr>::get(
- {NameLoc::get(funcName),
- FileLineColLoc::get(fileName, subprogram->getLine(), /*column=*/0)},
+ {FileLineColLoc::get(fileName, subprogram->getLine(), /*column=*/0)},
translate(subprogram), context);
}
diff --git a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
index 132be4e..51c6077 100644
--- a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
+++ b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
@@ -956,7 +956,7 @@ inline parsed_inst_t ExpressionParser::buildNumericOp(
<< ", type = " << ty << " ***";
auto tysToPop = SmallVector<Type, numOperands>();
tysToPop.resize(numOperands);
- std::fill(tysToPop.begin(), tysToPop.end(), ty);
+ llvm::fill(tysToPop, ty);
auto operands = popOperands(tysToPop);
if (failed(operands))
return failure();
diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
index c883baa..3236b4f 100644
--- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp
+++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Parser.h"
#include <optional>
@@ -828,6 +829,7 @@ LogicalResult Parser::parseTdInclude(StringRef filename, llvm::SMRange fileLoc,
llvm::SourceMgr tdSrcMgr;
tdSrcMgr.AddNewSourceBuffer(std::move(*includeBuffer), SMLoc());
tdSrcMgr.setIncludeDirs(parserSrcMgr.getIncludeDirs());
+ tdSrcMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
// This class provides a context argument for the llvm::SourceMgr diagnostic
// handler.
diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
index 30fd384..9ef405d 100644
--- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
+++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp
@@ -37,6 +37,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/LogicalResult.h"
#include "llvm/Support/ManagedStatic.h"
@@ -226,6 +227,18 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig {
"bitstream", "Print bitstream file")),
llvm::cl::cat(remarkCategory)};
+ static llvm::cl::opt<RemarkPolicy, /*ExternalStorage=*/true> remarkPolicy{
+ "remark-policy",
+ llvm::cl::desc("Specify the policy for remark output."),
+ cl::location(remarkPolicyFlag),
+ llvm::cl::value_desc("format"),
+ llvm::cl::init(RemarkPolicy::REMARK_POLICY_ALL),
+ llvm::cl::values(clEnumValN(RemarkPolicy::REMARK_POLICY_ALL, "all",
+ "Print all remarks"),
+ clEnumValN(RemarkPolicy::REMARK_POLICY_FINAL, "final",
+ "Print final remarks")),
+ llvm::cl::cat(remarkCategory)};
+
static cl::opt<std::string, /*ExternalStorage=*/true> remarksAll(
"remarks-filter",
cl::desc("Show all remarks: passed, missed, failed, analysis"),
@@ -517,18 +530,28 @@ performActions(raw_ostream &os,
return failure();
context->enableMultithreading(wasThreadingEnabled);
-
+ // Set the remark categories and policy.
remark::RemarkCategories cats{
config.getRemarksAllFilter(), config.getRemarksPassedFilter(),
config.getRemarksMissedFilter(), config.getRemarksAnalyseFilter(),
config.getRemarksFailedFilter()};
mlir::MLIRContext &ctx = *context;
+ // Helper to create the appropriate policy based on configuration
+ auto createPolicy = [&config]()
+ -> std::unique_ptr<mlir::remark::detail::RemarkEmittingPolicyBase> {
+ if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_ALL)
+ return std::make_unique<mlir::remark::RemarkEmittingPolicyAll>();
+ if (config.getRemarkPolicy() == RemarkPolicy::REMARK_POLICY_FINAL)
+ return std::make_unique<mlir::remark::RemarkEmittingPolicyFinal>();
+
+ llvm_unreachable("Invalid remark policy");
+ };
switch (config.getRemarkFormat()) {
case RemarkFormat::REMARK_FORMAT_STDOUT:
if (failed(mlir::remark::enableOptimizationRemarks(
- ctx, nullptr, cats, true /*printAsEmitRemarks*/)))
+ ctx, nullptr, createPolicy(), cats, true /*printAsEmitRemarks*/)))
return failure();
break;
@@ -537,7 +560,7 @@ performActions(raw_ostream &os,
? "mlir-remarks.yaml"
: config.getRemarksOutputFile();
if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
- ctx, file, llvm::remarks::Format::YAML, cats)))
+ ctx, file, llvm::remarks::Format::YAML, createPolicy(), cats)))
return failure();
break;
}
@@ -547,7 +570,7 @@ performActions(raw_ostream &os,
? "mlir-remarks.bitstream"
: config.getRemarksOutputFile();
if (failed(mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
- ctx, file, llvm::remarks::Format::Bitstream, cats)))
+ ctx, file, llvm::remarks::Format::Bitstream, createPolicy(), cats)))
return failure();
break;
}
@@ -593,6 +616,12 @@ performActions(raw_ostream &os,
AsmState asmState(op.get(), OpPrintingFlags(), /*locationMap=*/nullptr,
&fallbackResourceMap);
os << OpWithState(op.get(), asmState) << '\n';
+
+ // This is required if the remark policy is final. Otherwise, the remarks are
+ // not emitted.
+ if (remark::detail::RemarkEngine *engine = ctx.getRemarkEngine())
+ engine->getRemarkEmittingPolicy()->finalize();
+
return success();
}
diff --git a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
index 60b9567..1dbe7eca 100644
--- a/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
+++ b/mlir/lib/Tools/mlir-pdll-lsp-server/PDLLServer.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LSP/Logging.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <optional>
using namespace mlir;
@@ -402,6 +403,7 @@ PDLDocument::PDLDocument(const llvm::lsp::URIForFile &uri, StringRef contents,
llvm::append_range(includeDirs, extraDirs);
sourceMgr.setIncludeDirs(includeDirs);
+ sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
astContext.getDiagEngine().setHandlerFn([&](const ast::Diagnostic &diag) {
diff --git a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
index 3080b78..2d817be 100644
--- a/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
+++ b/mlir/lib/Tools/tblgen-lsp-server/TableGenServer.cpp
@@ -17,6 +17,7 @@
#include "llvm/Support/LSP/Logging.h"
#include "llvm/Support/LSP/Protocol.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/TableGen/Parser.h"
#include "llvm/TableGen/Record.h"
#include <optional>
@@ -448,6 +449,7 @@ void TableGenTextFile::initialize(
return;
}
sourceMgr.setIncludeDirs(includeDirs);
+ sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
// This class provides a context argument for the SourceMgr diagnostic
diff --git a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
index 111f58e..5f3b04a 100644
--- a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp
@@ -66,7 +66,9 @@ size_t mlir::moveLoopInvariantCode(
size_t numMoved = 0;
for (Region *region : regions) {
- LDBG() << "Original loop:\n" << *region->getParentOp();
+ LDBG() << "Original loop:\n"
+ << OpWithFlags(region->getParentOp(),
+ OpPrintingFlags().skipRegions());
std::queue<Operation *> worklist;
// Add top-level operations in the loop body to the worklist.
@@ -90,7 +92,8 @@ size_t mlir::moveLoopInvariantCode(
!canBeHoisted(op, definedOutside))
continue;
- LDBG() << "Moving loop-invariant op: " << *op;
+ LDBG() << "Moving loop-invariant op: "
+ << OpWithFlags(op, OpPrintingFlags().skipRegions());
moveOutOfRegion(op, region);
++numMoved;
@@ -111,9 +114,7 @@ size_t mlir::moveLoopInvariantCode(LoopLikeOpInterface loopLike) {
[&](Value value, Region *) {
return loopLike.isDefinedOutsideOfLoop(value);
},
- [&](Operation *op, Region *) {
- return isMemoryEffectFree(op) && isSpeculatable(op);
- },
+ [&](Operation *op, Region *) { return isPure(op); },
[&](Operation *op, Region *) { loopLike.moveOutOfLoop(op); });
}
diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
new file mode 100644
index 0000000..808c1c2
--- /dev/null
+++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s
+
+func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index
+ %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index
+
+ // Test subview with unknown sizes, and constant offsets and strides.
+ // CHECK: Op: %[[SV0:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}]
+ // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}]
+ %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with bounded dynamic offsets.
+ // CHECK: Op: %[[SV1:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with constant operands.
+ // CHECK: Op: %[[SV2:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a rank-reducing subview.
+ // CHECK: Op: %[[SV3:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>>
+
+ // Test a subview of a rank-reducing subview
+ // CHECK: Op: %[[SV4:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>>
+
+ // Test a subview with mixed bounded and unbound dynamic sizes.
+ // CHECK: Op: %[[SV5:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}]
+ // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}]
+ %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+ return
+}
+
+// CHECK: func.func @memref_subview
+// CHECK: %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>>
+// CHECK: %[[SV0]] = memref.subview %[[A0]]
+// CHECK-NEXT: %[[SV1]] = memref.subview
+// CHECK-NEXT: %[[SV2]] = memref.subview
+// CHECK-NEXT: %[[SV3]] = memref.subview
+// CHECK-NEXT: %[[SV4]] = memref.subview
+// CHECK-NEXT: %[[SV5]] = memref.subview
diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
index 2492ada..82426c4 100644
--- a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
+++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
@@ -1,6 +1,7 @@
// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
// RUN: -debug-only=serialize-to-isa 2> %t
// RUN: FileCheck --input-file=%t %s
+// REQUIRES: asserts
//
// MathToXeVM pass generates OpenCL intrinsics function calls when converting
// Math ops with `fastmath` attr to native function calls. It is assumed that
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
index 0b150e9..9c552d8 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
@@ -14,19 +14,36 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
// CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
// CHECK: %[[VAR6:.*]] = scf.if %[[VAR2]] -> (f16) {
- // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> vector<1xf16>
- // CHECK: %[[VAR8:.*]] = vector.extract %[[VAR7]][0] : f16 from vector<1xf16>
- // CHECK: scf.yield %[[VAR8]] : f16
- // CHECK: } else {
- // CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf16>
- // CHECK: %[[VAR7:.*]] = vector.extract %[[CST_0]][0] : f16 from vector<1xf16>
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16
// CHECK: scf.yield %[[VAR7]] : f16
+ // CHECK: } else {
+ // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
+ // CHECK: scf.yield %[[CST_0]] : f16
// CHECK: }
%3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
gpu.return
}
}
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: @source_materialize_single_elem_vec
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
+gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) {
+ %1 = arith.constant dense<1>: vector<1xi1>
+ %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+ // CHECK: %[[VAR_IF:.*]] = scf.if
+ // CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16>
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16>
+ %c0 = arith.constant 0 : index
+ vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16>
+ gpu.return
+}
+}
+
// -----
gpu.module @test {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
index c58b153..21b508e 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
// -----
-func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
func.return %A : tensor<?xf32>
}
-// CHECK-LABEL: func @return_arg
+// CHECK-LABEL: func private @return_arg
// CHECK-SAME: %[[A:.*]]: memref<?xf32
// CHECK-NOT: return %[[A]]
-// NO-DROP-LABEL: func @return_arg
+// NO-DROP-LABEL: func private @return_arg
// NO-DROP-SAME: %[[A:.*]]: memref<?xf32
// NO-DROP: return %[[A]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index 6054a61..d5f834b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
// Bufferization of a function that is reading and writing. %t0 is writable, so
// no copy should be inserted.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
return %0, %1 : tensor<?xf32>, f32
}
-// CHECK-LABEL: func @call_func_with_non_tensor_return(
+// CHECK-LABEL: func private @call_func_with_non_tensor_return(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @call_func_with_non_tensor_return(
+func.func private @call_func_with_non_tensor_return(
%t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
// CHECK-NOT: alloc
// CHECK-NOT: copy
@@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return(
// Bufferization of a function that is reading and writing. %t0 is not writable,
// so a copy is needed.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
// This function does not read, just write. We need an alloc, but no copy.
-// CHECK-LABEL: func @does_not_read(
+// CHECK-LABEL: func private @does_not_read(
// CHECK-NOT: alloc
// CHECK-NOT: copy
-func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
%f0 = arith.constant 0.0 : f32
%r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
return %r : tensor<?xf32>
@@ -354,9 +354,9 @@ func.func @main() {
// A write inside an scf.execute_region. An equivalent tensor is yielded.
-// CHECK-LABEL: func @execute_region_test(
+// CHECK-LABEL: func private @execute_region_test(
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
-func.func @execute_region_test(%t1 : tensor<?xf32>)
+func.func private @execute_region_test(%t1 : tensor<?xf32>)
-> (f32, tensor<?xf32>, f32)
{
%f1 = arith.constant 0.0 : f32
@@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() {
// CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>)
func.func private @some_external_func(tensor<?xf32>)
-// CHECK: func @scf_for_with_tensor_insert_slice(
+// CHECK: func private @scf_for_with_tensor_insert_slice(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor_insert_slice(
+func.func private @scf_for_with_tensor_insert_slice(
%A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
@@ -456,11 +456,11 @@ func.func @bar(
// -----
-// CHECK: func @init_and_dot(
+// CHECK: func private @init_and_dot(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>>
-func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
+func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32
%v0 = arith.constant 0.0 : f32
@@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i
// No alloc or copy inside of the loop.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
// CHECK: memref.store %{{.*}}, %[[arg0]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
index e2ab876..b52612d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
@@ -24,10 +24,46 @@
// CHECK-NOT: copy
// CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]])
%0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
- // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}>
+ // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32{{.*}}>
return %1, %0 : f32, tensor<?xf32>
}
"test.finish" () : () -> ()
}) : () -> ()
+// -----
+#enc1 = #test.tensor_encoding<"hello">
+#enc2 = #test.tensor_encoding<"not hello">
+
+"test.symbol_scope_isolated"() ({
+ // CHECK: func @inner_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"hello">>
+ func.func @inner_func(%t: tensor<?xf32, #enc1>)
+ -> tensor<?xf32, #enc1> {
+ // CHECK: return %[[arg0]]
+ return %t : tensor<?xf32, #enc1>
+ }
+
+ // CHECK: func @outer_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> (memref<?xf32, #test.memref_layout<"hello">>,
+ // CHECK-SAME: memref<?xf32, #test.memref_layout<"not hello">>)
+ func.func @outer_func(%t0: tensor<?xf32, #enc1>)
+ -> (tensor<?xf32, #enc1>, tensor<?xf32, #enc2>) {
+ // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
+ %0 = call @inner_func(%t0)
+ : (tensor<?xf32, #enc1>) -> (tensor<?xf32, #enc1>)
+
+ // CHECK: %[[local:.*]] = "test.create_memref_op"() : ()
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"not hello">>
+ %local = "test.create_tensor_op"() : () -> tensor<?xf32, #enc2>
+ // CHECK: %[[dummy:.*]] = "test.dummy_memref_op"(%[[local]])
+ %1 = "test.dummy_tensor_op"(%local) : (tensor<?xf32, #enc2>)
+ -> tensor<?xf32, #enc2>
+
+ // CHECK: return %[[call]], %[[dummy]]
+ return %0, %1 : tensor<?xf32, #enc1>, tensor<?xf32, #enc2>
+ }
+ "test.finish" () : () -> ()
+}) : () -> ()
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 618ba34..66cae5c 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } {
} -> tensor<1x1x4xf32>
return
}
+
+ func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) {
+ %0 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>)
+ outs(%arg2 : tensor<128x128xi32>) {
+ ^bb0(%in: i32, %in_0: i32, %out: i32):
+ linalg.yield %out : i32
+ } -> tensor<128x128xi32>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 9616a3e..1df15e8 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -10,10 +10,10 @@
// TODO: Some test cases from this file should be moved to other dialects.
-// CHECK-LABEL: func @fill_inplace(
+// CHECK-LABEL: func private @fill_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
-// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
-func.func @fill_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) {
+func.func private @fill_inplace(
%A : tensor<?xf32> {bufferization.writable = true})
-> tensor<?xf32>
{
@@ -56,10 +56,10 @@ func.func @not_inplace(
// -----
-// CHECK-LABEL: func @not_inplace
+// CHECK-LABEL: func private @not_inplace
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) {
-// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
-func.func @not_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) {
+func.func private @not_inplace(
%A : tensor<?x?xf32> {bufferization.writable = true})
-> tensor<?x?xf32>
{
@@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1(
// -----
-func.func @gather_like(
+func.func private @gather_like(
%arg0 : tensor<?x?xf32> {bufferization.writable = false},
%arg1 : tensor<?xi32> {bufferization.writable = false},
%arg2 : tensor<?x?xf32> {bufferization.writable = true})
@@ -254,7 +254,7 @@ func.func @gather_like(
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @gather_like(
+// CHECK-LABEL: func private @gather_like(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32,
// CHECK-SAME: %[[ARG1:.+]]: memref<?xi32
// CHECK-SAME: %[[ARG2:.+]]: memref<?x?xf32
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
new file mode 100644
index 0000000..35355c6
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
@@ -0,0 +1,102 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
new file mode 100644
index 0000000..8403ee8
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s
+
+// CHECK: acc.private.recipe @private_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index a1067ec..af09dc8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -8,11 +8,11 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @scf_for_yield_only(
+// CHECK-LABEL: func private @scf_for_yield_only(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: ) -> memref<?xf32> {
-func.func @scf_for_yield_only(
+func.func private @scf_for_yield_only(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index, %ub : index, %step : index)
@@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
// -----
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor.insert_slice(
+func.func private @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = false},
@@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>,
// -----
-// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict(
+// CHECK-LABEL: func private @parallel_insert_slice_no_conflict(
// CHECK-SAME: %[[idx:.*]]: index, %[[idx2:.*]]: index,
// CHECK-SAME: %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
// CHECK-SAME: %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
-func.func @parallel_insert_slice_no_conflict(
+func.func private @parallel_insert_slice_no_conflict(
%idx: index,
%idx2: index,
%arg1: tensor<?xf32> {bufferization.writable = true},
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 5f95da2..b6c72be 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -8,12 +8,12 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @insert_slice_fun
+// CHECK-LABEL: func private @insert_slice_fun
// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @insert_slice_fun(
+func.func private @insert_slice_fun(
%A0 : tensor<?xf32> {bufferization.writable = false},
%A1 : tensor<?xf32> {bufferization.writable = true},
%t0 : tensor<4xf32> {bufferization.writable = false},
@@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index)
// -----
// CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)>
-// CHECK-LABEL: func.func @cast_retains_buffer_layout(
+// CHECK-LABEL: func.func private @cast_retains_buffer_layout(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout(
+func.func private @cast_retains_buffer_layout(
%t: tensor<?xf32>
{bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>},
%sz: index)
@@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout(
// -----
-// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided(
+// CHECK-LABEL: func private @cast_retains_buffer_layout_strided(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout_strided(
+func.func private @cast_retains_buffer_layout_strided(
%t: tensor<?xf32>
{bufferization.buffer_layout = strided<[1], offset: 5>},
%sz: index)
diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
new file mode 100644
index 0000000..023a0e5
--- /dev/null
+++ b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
@@ -0,0 +1,311 @@
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
+
+///===----------------------------------------------===//
+/// Tests of `StepCompareFolder`
+///===----------------------------------------------===//
+
+
+///===------------------------------------===//
+/// Tests of `ugt` (unsigned greater than)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ugt_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 3 > [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 > [0, 1, 2] => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_max_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_max_rhs() -> vector<3xi1> {
+ // The largest i64 possible:
+ %cst = arith.constant dense<0x7fffffffffffffff> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 2 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 1 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `uge` (unsigned greater than or equal)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @uge_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 >= [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 >= [0, 1, 2] => [true, false, false] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @uge_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 2 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+
+///===------------------------------------===//
+/// Tests of `ult` (unsigned less than)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @ult_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 < [0, 1, 2] => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 < [0, 1, 2] => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ult_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 3 => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 2 => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ule` (unsigned less than or equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ule_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ule_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `eq` (equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @eq_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @eq_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_eq_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_eq_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ne` (not equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ne_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ne_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ne_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ne_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
index 35db14e..e5a98b5 100644
--- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir
+++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -188,15 +188,38 @@ func.func @vector_fma(%a: vector<4x4xf32>, %b: vector<4x4xf32>, %c: vector<4x4xf
// CHECK-LABEL: func @vector_fma
// CHECK-COUNT-4: vector.fma %{{.+}}, %{{.+}}, %{{.+}} : vector<2x2xf32>
-// TODO: We should be able to unroll this like the example above - this will require extending UnrollElementwisePattern.
-func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
+func.func @vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
%0 = vector.fma %a, %a, %a : vector<3x2x2xf32>
return %0 : vector<3x2x2xf32>
}
-// CHECK-LABEL: func @negative_vector_fma_3d
-// CHECK-NOT: vector.extract_strided_slice
-// CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32>
-// CHECK: return
+// CHECK-LABEL: func @vector_fma_3d
+// CHECK-SAME: (%[[SRC:.*]]: vector<3x2x2xf32>) -> vector<3x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<3x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_0:.*]] = vector.shape_cast %[[E_OUT_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA0:.*]] = vector.fma %[[S_LHS_0]], %[[S_RHS_0]], %[[S_OUT_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[FMA0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_1:.*]] = vector.shape_cast %[[E_OUT_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA1:.*]] = vector.fma %[[S_LHS_1]], %[[S_RHS_1]], %[[S_OUT_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[FMA1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_2:.*]] = vector.shape_cast %[[E_LHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_2:.*]] = vector.shape_cast %[[E_RHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_2:.*]] = vector.shape_cast %[[E_OUT_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA2:.*]] = vector.fma %[[S_LHS_2]], %[[S_RHS_2]], %[[S_OUT_2]] : vector<2x2xf32>
+// CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[FMA2]], %[[I1]] {offsets = [2, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: return %[[I2]] : vector<3x2x2xf32>
func.func @vector_multi_reduction(%v : vector<4x6xf32>, %acc: vector<4xf32>) -> vector<4xf32> {
%0 = vector.multi_reduction #vector.kind<add>, %v, %acc [1] : vector<4x6xf32> to vector<4xf32>
@@ -440,3 +463,36 @@ func.func @vector_step() -> vector<32xindex> {
// CHECK: %[[ADD3:.*]] = arith.addi %[[STEP]], %[[CST]] : vector<8xindex>
// CHECK: %[[INS3:.*]] = vector.insert_strided_slice %[[ADD3]], %[[INS2]] {offsets = [24], strides = [1]} : vector<8xindex> into vector<32xindex>
// CHECK: return %[[INS3]] : vector<32xindex>
+
+
+func.func @elementwise_3D_to_2D(%v1: vector<2x2x2xf32>, %v2: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2xf32>
+ return %0 : vector<2x2x2xf32>
+}
+// CHECK-LABEL: func @elementwise_3D_to_2D
+// CHECK-SAME: (%[[ARG0:.*]]: vector<2x2x2xf32>, %[[ARG1:.*]]: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD0:.*]] = arith.addf %[[S_LHS_0]], %[[S_RHS_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[ADD0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD1:.*]] = arith.addf %[[S_LHS_1]], %[[S_RHS_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[ADD1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: return %[[I1]] : vector<2x2x2xf32>
+
+
+func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf32>) -> vector<2x2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2x2xf32>
+ return %0 : vector<2x2x2x2xf32>
+}
+
+// CHECK-LABEL: func @elementwise_4D_to_2D
+// CHECK-COUNT-4: arith.addf %{{.*}}, %{{.*}} : vector<2x2xf32>
+// CHECK-NOT: arith.addf
+// CHECK: return
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index bb76392..401cdd29 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -1925,3 +1925,22 @@ func.func @warp_scf_if_distribute(%pred : i1) {
// CHECK-PROP: "some_use"(%[[IF_YIELD_DIST]]) : (vector<1xf32>) -> ()
// CHECK-PROP: return
// CHECK-PROP: }
+
+// -----
+func.func @dedup_unused_result(%laneid : index) -> (vector<1xf32>) {
+ %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] ->
+ (vector<1xf32>, vector<2xf32>, vector<1xf32>) {
+ %2 = "some_def"() : () -> (vector<32xf32>)
+ %3 = "some_def"() : () -> (vector<64xf32>)
+ gpu.yield %2, %3, %2 : vector<32xf32>, vector<64xf32>, vector<32xf32>
+ }
+ %r0 = "some_use"(%r#2, %r#2) : (vector<1xf32>, vector<1xf32>) -> (vector<1xf32>)
+ return %r0 : vector<1xf32>
+}
+
+// CHECK-PROP: func @dedup_unused_result
+// CHECK-PROP: %[[R:.*]] = gpu.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>)
+// CHECK-PROP: %[[Y0:.*]] = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP: %[[Y1:.*]] = "some_def"() : () -> vector<64xf32>
+// CHECK-PROP: gpu.yield %[[Y0]] : vector<32xf32>
+// CHECK-PROP: "some_use"(%[[R]], %[[R]]) : (vector<1xf32>, vector<1xf32>) -> vector<1xf32>
diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
index cb16c37..b3154d4 100644
--- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
+++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
@@ -3,7 +3,16 @@
// RUN: | FileCheck %s
// CHECK: data =
-// CHECK-RAW: [[[7.7, 0, 0], [7.7, 0, 0], [7.7, 0, 0]], [[0, 7.7, 0], [0, 7.7, 0], [0, 7.7, 0]], [[0, 0, 7.7], [0, 0, 7.7], [0, 0, 7.7]]]
+// CHECK{LITERAL}: [[[7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0]],
+// CHECK{LITERAL}: [[0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0]],
+// CHECK{LITERAL}: [[0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7]]]
+
module attributes {
gpu.container_module,
spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir
new file mode 100644
index 0000000..325271e
--- /dev/null
+++ b/mlir/test/Pass/remark-final.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml
+// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml
+module @foo {
+ "test.op"() : () -> ()
+
+}
+
+// CHECK-YAML-NOT: This is a test passed remark (should be dropped)
+// CHECK-YAML-DAG: !Analysis
+// CHECK-YAML-DAG: !Failure
+// CHECK-YAML-DAG: !Passed
+
+// CHECK-NOT: This is a test passed remark (should be dropped)
+// CHECK-DAG: remark: [Analysis] test-remark
+// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed
+// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed
diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll
index e056e43..61376b8 100644
--- a/mlir/test/Target/LLVMIR/Import/debug-info.ll
+++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll
@@ -240,11 +240,10 @@ define void @subprogram() !dbg !3 {
define void @func_loc() !dbg !3 {
ret void
}
-; CHECK-DAG: #[[NAME_LOC:.+]] = loc("func_loc")
; CHECK-DAG: #[[FILE_LOC:.+]] = loc("debug-info.ll":42:0)
; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #{{.*}}, name = "func_loc", file = #{{.*}}, line = 42, subprogramFlags = Definition>
-; CHECK: loc(fused<#[[SP]]>[#[[NAME_LOC]], #[[FILE_LOC]]]
+; CHECK: loc(fused<#[[SP]]>[#[[FILE_LOC]]]
!llvm.dbg.cu = !{!1}
!llvm.module.flags = !{!0}
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
new file mode 100644
index 0000000..04e2ddf
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: @convert_f32x2_to_f4x2_e2m1
+llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) {
+ // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res1]] to i8
+ %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN)
+ // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8
+ %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN)
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 0b36154..b2fe2f5 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -254,6 +254,14 @@ llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) {
// -----
+llvm.func @nvvm_cvt_f32x2_to_f4x2_invalid_type(%a : f32, %b : f32) {
+ // expected-error @below {{Only 'f4E2M1FN' type is supported for conversions from f32x2 to f4x2.}}
+ %res = nvvm.convert.f32x2.to.f4x2 %a, %b : i8 (f8E4M3FN)
+ llvm.return
+}
+
+// -----
+
llvm.func @nvvm_prefetch_L1_with_evict_priority(%global_ptr: !llvm.ptr<1>) {
// expected-error @below {{cache eviction priority supported only for cache level L2}}
nvvm.prefetch level = L1, evict_priority = evict_last, %global_ptr : !llvm.ptr<1>
@@ -559,3 +567,13 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
llvm.return
}
+
+
+// -----
+
+// Test for range validation - invalid range where lower == upper but not at extremes
+func.func @invalid_range_equal_bounds() {
+ // expected-error @below {{invalid range attribute: Lower == Upper, but they aren't min (0) or max (4294967295) value! This is an invalid constant range.}}
+ %0 = nvvm.read.ptx.sreg.warpsize range <i32, 32, 32> : i32
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 00a479d..594ae48 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -152,6 +152,10 @@ llvm.func @nvvm_special_regs() -> i32 {
%74 = nvvm.read.ptx.sreg.lanemask.ge : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
%75 = nvvm.read.ptx.sreg.lanemask.gt : i32
+ // CHECK: %76 = call range(i32 0, 0) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32
+ // CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32
llvm.return %1 : i32
}
diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt
index 9187998..c37671a 100644
--- a/mlir/test/lib/Analysis/CMakeLists.txt
+++ b/mlir/test/lib/Analysis/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis
DataFlow/TestDenseForwardDataFlowAnalysis.cpp
DataFlow/TestLivenessAnalysis.cpp
DataFlow/TestSparseBackwardDataFlowAnalysis.cpp
+ DataFlow/TestStridedMetadataRangeAnalysis.cpp
EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..6ac09fd
--- /dev/null
+++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,86 @@
+//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlowFramework.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+static void printAnalysisResults(DataFlowSolver &solver, Operation *op,
+ raw_ostream &os) {
+ // Collect the strided metadata of the op results.
+ SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results;
+ for (OpResult result : op->getResults()) {
+ const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result);
+ // Skip the result if it's uninitialized.
+ if (!state || state->getValue().isUninitialized())
+ continue;
+
+ // Skip the result if the range is empty.
+ const mlir::StridedMetadataRange &md = state->getValue();
+ if (md.getOffsets().empty() && md.getSizes().empty() &&
+ md.getStrides().empty())
+ continue;
+ results.push_back({result.getResultNumber(), state});
+ }
+
+ // Early exit if there's no metadata to print.
+ if (results.empty())
+ return;
+
+ // Print the metadata.
+ os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n";
+ for (auto [idx, state] : results)
+ os << " result[" << idx << "]: " << state->getValue() << "\n";
+ os << "\n";
+}
+
+namespace {
+struct TestStridedMetadataRangeAnalysisPass
+ : public PassWrapper<TestStridedMetadataRangeAnalysisPass,
+ OperationPass<>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+ TestStridedMetadataRangeAnalysisPass)
+
+ StringRef getArgument() const override {
+ return "test-strided-metadata-range-analysis";
+ }
+ void runOnOperation() override {
+ Operation *op = getOperation();
+
+ DataFlowSolver solver;
+ solver.load<DeadCodeAnalysis>();
+ solver.load<SparseConstantPropagation>();
+ solver.load<IntegerRangeAnalysis>();
+ solver.load<StridedMetadataRangeAnalysis>();
+ if (failed(solver.initializeAndRun(op)))
+ return signalPassFailure();
+
+ op->walk(
+ [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); });
+ }
+};
+} // end anonymous namespace
+
+namespace mlir {
+namespace test {
+void registerTestStridedMetadataRangeAnalysisPass() {
+ PassRegistration<TestStridedMetadataRangeAnalysisPass>();
+}
+} // end namespace test
+} // end namespace mlir
diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
index 1e2d4a7..4069a74 100644
--- a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
+++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
@@ -11,11 +11,25 @@
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/Pass.h"
+#include "TestAttributes.h" // TestTensorEncodingAttr, TestMemRefLayoutAttr
+#include "TestDialect.h"
+
using namespace mlir;
namespace {
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding = dyn_cast_if_present<test::TestTensorEncodingAttr>(
+ tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
struct TestOneShotModuleBufferizePass
: public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass)
@@ -25,6 +39,7 @@ struct TestOneShotModuleBufferizePass
: PassWrapper(pass) {}
void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<test::TestDialect>();
registry.insert<bufferization::BufferizationDialect>();
}
StringRef getArgument() const final {
@@ -41,6 +56,17 @@ struct TestOneShotModuleBufferizePass
bufferization::OneShotBufferizationOptions opt;
opt.bufferizeFunctionBoundaries = true;
+ opt.functionArgTypeConverterFn =
+ [&](bufferization::TensorLikeType tensor, Attribute memSpace,
+ func::FuncOp, const bufferization::BufferizationOptions &) {
+ assert(isa<RankedTensorType>(tensor) && "tests only builtin tensors");
+ auto tensorType = cast<RankedTensorType>(tensor);
+ auto layout = getMemRefLayoutForTensorEncoding(tensorType);
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(tensorType.getShape(),
+ tensorType.getElementType(), layout, memSpace));
+ };
+
bufferization::BufferizationState bufferizationState;
if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt,
diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
index f84055d..1e59338 100644
--- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
@@ -1,6 +1,7 @@
add_mlir_library(MLIROpenACCTestPasses
TestOpenACC.cpp
TestPointerLikeTypeInterface.cpp
+ TestRecipePopulate.cpp
EXCLUDE_FROM_LIBMLIR
)
diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
index 9886240..bea21b9 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
@@ -15,9 +15,13 @@ namespace test {
// Forward declarations of individual test pass registration functions
void registerTestPointerLikeTypeInterfacePass();
+void registerTestRecipePopulatePass();
// Unified registration function for all OpenACC tests
-void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); }
+void registerTestOpenACC() {
+ registerTestPointerLikeTypeInterfacePass();
+ registerTestRecipePopulatePass();
+}
} // namespace test
} // namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
index 85f9283..027b0a1 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate(
newBuilder.setInsertionPointAfter(op);
// Call the genAllocate API
+ bool needsFree = false;
Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc",
- result.getType(), result);
+ result.getType(), result, needsFree);
if (allocRes) {
llvm::errs() << "Successfully generated alloc for operation: ";
op->print(llvm::errs());
llvm::errs() << "\n";
+ llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n";
// Print all operations that were inserted
for (Operation *insertedOp : tracker.insertedOps) {
@@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result,
// Call the genFree API
auto typedResult = cast<TypedValue<PointerLikeType>>(result);
- bool success =
- pointerType.genFree(newBuilder, loc, typedResult, result.getType());
+ bool success = pointerType.genFree(newBuilder, loc, typedResult, result,
+ result.getType());
if (success) {
llvm::errs() << "Successfully generated free for operation: ";
diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
new file mode 100644
index 0000000..35f092c
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
@@ -0,0 +1,110 @@
+//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains test passes for testing the createAndPopulate methods
+// of the recipe operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+namespace {
+
+struct TestRecipePopulatePass
+ : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass)
+
+ TestRecipePopulatePass() = default;
+ TestRecipePopulatePass(const TestRecipePopulatePass &pass)
+ : PassWrapper(pass) {
+ recipeType = pass.recipeType;
+ }
+
+ Pass::Option<std::string> recipeType{
+ *this, "recipe-type",
+ llvm::cl::desc("Recipe type: private or firstprivate"),
+ llvm::cl::init("private")};
+
+ StringRef getArgument() const override { return "test-acc-recipe-populate"; }
+
+ StringRef getDescription() const override {
+ return "Test OpenACC recipe population";
+ }
+
+ void runOnOperation() override;
+
+ void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<acc::OpenACCDialect>();
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ }
+};
+
+void TestRecipePopulatePass::runOnOperation() {
+ auto module = getOperation();
+ OpBuilder builder(&getContext());
+
+ // Collect all test variables
+ SmallVector<std::tuple<Operation *, Value, std::string>> testVars;
+
+ module.walk([&](Operation *op) {
+ if (auto varName = op->getAttrOfType<StringAttr>("test.var")) {
+ for (auto result : op->getResults()) {
+ testVars.push_back({op, result, varName.str()});
+ }
+ }
+ });
+
+ // Generate recipes at module level
+ builder.setInsertionPoint(&module.getBodyRegion().front(),
+ module.getBodyRegion().front().begin());
+
+ for (auto [op, var, varName] : testVars) {
+ Location loc = op->getLoc();
+
+ std::string recipeName = recipeType.getValue() + "_" + varName;
+ ValueRange bounds; // No bounds for memref tests
+
+ if (recipeType == "private") {
+ auto recipe = PrivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create private recipe for ") << varName;
+ }
+ } else if (recipeType == "firstprivate") {
+ auto recipe = FirstprivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create firstprivate recipe for ") << varName;
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace mlir {
+namespace test {
+
+void registerTestRecipePopulatePass() {
+ PassRegistration<TestRecipePopulatePass>();
+}
+
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
index 5685004..9e7e4f8 100644
--- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td
+++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
@@ -22,6 +22,7 @@ include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/TensorEncoding.td"
// All of the attributes will extend this class.
class Test_Attr<string name, list<Trait> traits = []>
@@ -439,4 +440,20 @@ def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> {
let hasStorageCustomConstructor = 1;
}
+def TestTensorEncodingAttr : Test_Attr<"TestTensorEncoding",
+ [DeclareAttrInterfaceMethods<VerifiableTensorEncoding>]> {
+ let mnemonic = "tensor_encoding";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
+def TestMemRefLayoutAttr : Test_Attr<"TestMemRefLayout",
+ [DeclareAttrInterfaceMethods<MemRefLayoutAttrInterface>]> {
+ let mnemonic = "memref_layout";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
#endif // TEST_ATTRDEFS
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
index fe1e916..9db7b01 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
@@ -542,6 +542,24 @@ test::detail::TestCustomStorageCtorAttrAttrStorage::construct(
}
//===----------------------------------------------------------------------===//
+// TestTensorEncodingAttr
+//===----------------------------------------------------------------------===//
+
+::llvm::LogicalResult TestTensorEncodingAttr::verifyEncoding(
+ mlir::ArrayRef<int64_t> shape, mlir::Type elementType,
+ llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const {
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// TestMemRefLayoutAttr
+//===----------------------------------------------------------------------===//
+
+mlir::AffineMap TestMemRefLayoutAttr::getAffineMap() const {
+ return mlir::AffineMap::getMultiDimIdentityMap(1, getContext());
+}
+
+//===----------------------------------------------------------------------===//
// TestDialect
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.h b/mlir/test/lib/Dialect/Test/TestAttributes.h
index 778d84fa..0ad5ab6 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.h
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.h
@@ -24,6 +24,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/DialectResourceBlobManager.h"
+#include "mlir/IR/TensorEncoding.h"
// generated files require above includes to come first
#include "TestAttrInterfaces.h.inc"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index f2adca6..bcf3b55d 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
#include "TestInterfaces.h"
#include "TestTypes.h"
#include "mlir/Bytecode/BytecodeImplementation.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.td b/mlir/test/lib/Dialect/Test/TestDialect.td
index 2b5491f..37a263f 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.td
+++ b/mlir/test/lib/Dialect/Test/TestDialect.td
@@ -24,7 +24,10 @@ def Test_Dialect : Dialect {
let useDefaultTypePrinterParser = 0;
let useDefaultAttributePrinterParser = 1;
let isExtensible = 1;
- let dependentDialects = ["::mlir::DLTIDialect"];
+ let dependentDialects = [
+ "::mlir::DLTIDialect",
+ "::mlir::bufferization::BufferizationDialect"
+ ];
let discardableAttrs = (ins
"mlir::IntegerAttr":$discardable_attr_key,
"SimpleAAttr":$other_discardable_attr_key
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 53055fe..b211e24 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1425,6 +1425,39 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return createNewMultiAllocaWithoutSlot(slot, builder, *this);
}
+namespace {
+/// Returns test dialect's memref layout for test dialect's tensor encoding when
+/// applicable.
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding =
+ dyn_cast<test::TestTensorEncodingAttr>(tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
+/// Auxiliary bufferization function for test and builtin tensors.
+bufferization::BufferLikeType
+convertTensorToBuffer(mlir::Operation *op,
+ const bufferization::BufferizationOptions &options,
+ bufferization::TensorLikeType tensorLike) {
+ auto buffer =
+ *tensorLike.getBufferType(options, [&]() { return op->emitError(); });
+ if (auto memref = dyn_cast<MemRefType>(buffer)) {
+ // Note: For the sake of testing, we want to ensure that encoding -> layout
+ // bufferization happens. This is currently achieved manually.
+ auto layout =
+ getMemRefLayoutForTensorEncoding(cast<RankedTensorType>(tensorLike));
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(memref.getShape(), memref.getElementType(), layout,
+ memref.getMemorySpace()));
+ }
+ return buffer;
+}
+} // namespace
+
::mlir::LogicalResult test::TestDummyTensorOp::bufferize(
::mlir::RewriterBase &rewriter,
const ::mlir::bufferization::BufferizationOptions &options,
@@ -1435,8 +1468,8 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return failure();
const auto outType = getOutput().getType();
- const auto bufferizedOutType = test::TestMemrefType::get(
- getContext(), outType.getShape(), outType.getElementType(), nullptr);
+ const auto bufferizedOutType =
+ convertTensorToBuffer(getOperation(), options, outType);
// replace op with memref analogy
auto dummyMemrefOp = test::TestDummyMemrefOp::create(
rewriter, getLoc(), bufferizedOutType, *buffer);
@@ -1470,13 +1503,12 @@ TestMultiSlotAlloca::handleDestructuringComplete(
mlir::FailureOr<mlir::bufferization::BufferLikeType>
test::TestCreateTensorOp::getBufferType(
- mlir::Value value, const mlir::bufferization::BufferizationOptions &,
+ mlir::Value value, const mlir::bufferization::BufferizationOptions &options,
const mlir::bufferization::BufferizationState &,
llvm::SmallVector<::mlir::Value> &) {
- const auto type = dyn_cast<test::TestTensorType>(value.getType());
+ const auto type = dyn_cast<bufferization::TensorLikeType>(value.getType());
if (type == nullptr)
return failure();
- return cast<mlir::bufferization::BufferLikeType>(test::TestMemrefType::get(
- getContext(), type.getShape(), type.getElementType(), nullptr));
+ return convertTensorToBuffer(getOperation(), options, type);
}
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6329d61..05a33cf 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -32,6 +32,7 @@ include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ValueBoundsOpInterface.td"
include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td"
// Include the attribute definitions.
include "TestAttrDefs.td"
@@ -2335,7 +2336,7 @@ def SideEffectWithRegionOp : TEST_Op<"side_effect_with_region_op",
}
//===----------------------------------------------------------------------===//
-// Copy Operation Test
+// Copy Operation Test
//===----------------------------------------------------------------------===//
def CopyOp : TEST_Op<"copy", []> {
@@ -3676,10 +3677,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
["bufferize", "bufferizesToMemoryRead",
"bufferizesToMemoryWrite", "getAliasingValues"]>]> {
let arguments = (ins
- Arg<TestTensorType>:$input
+ Arg<Bufferization_TensorLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestTensorType>:$output
+ Arg<Bufferization_TensorLikeTypeInterface>:$output
);
let extraClassDefinition = [{
@@ -3701,10 +3702,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> {
let arguments = (ins
- Arg<TestMemrefType>:$input
+ Arg<Bufferization_BufferLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestMemrefType>:$output
+ Arg<Bufferization_BufferLikeTypeInterface>:$output
);
}
@@ -3714,7 +3715,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
"bufferizesToMemoryWrite", "getAliasingValues",
"bufferizesToAllocation"]>]> {
let arguments = (ins);
- let results = (outs Arg<TestTensorType>:$output);
+ let results = (outs Arg<Bufferization_TensorLikeTypeInterface>:$output);
let extraClassDefinition = [{
bool test::TestCreateTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&,
const ::mlir::bufferization::AnalysisState&) {
@@ -3738,7 +3739,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
def TestCreateMemrefOp : TEST_Op<"create_memref_op"> {
let arguments = (ins);
- let results = (outs Arg<TestMemrefType>:$output);
+ let results = (outs Arg<Bufferization_BufferLikeTypeInterface>:$output);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp
index 3b25686..5ca2d1a 100644
--- a/mlir/test/lib/Pass/TestRemarksPass.cpp
+++ b/mlir/test/lib/Pass/TestRemarksPass.cpp
@@ -43,7 +43,12 @@ public:
<< remark::add("This is a test missed remark")
<< remark::reason("because we are testing the remark pipeline")
<< remark::suggest("try using the remark pipeline feature");
-
+ mlir::remark::passed(
+ loc,
+ remark::RemarkOpts::name("test-remark").category("category-1-passed"))
+ << remark::add("This is a test passed remark (should be dropped)")
+ << remark::reason("because we are testing the remark pipeline")
+ << remark::suggest("try using the remark pipeline feature");
mlir::remark::passed(
loc,
remark::RemarkOpts::name("test-remark").category("category-1-passed"))
diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td
index a896888..9dcf975 100644
--- a/mlir/test/mlir-tblgen/cpp-class-comments.td
+++ b/mlir/test/mlir-tblgen/cpp-class-comments.td
@@ -96,17 +96,14 @@ def EncodingTrait : AttrInterface<"EncodingTrait"> {
}];
let methods = [
];
-// ATTR-INTERFACE: namespace mlir
-// ATTR-INTERFACE-NEXT: namespace a
-// ATTR-INTERFACE-NEXT: namespace traits
+// ATTR-INTERFACE: namespace mlir::a::traits {
// ATTR-INTERFACE-NEXT: /// Common trait for all layouts.
// ATTR-INTERFACE-NEXT: class EncodingTrait;
}
def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> {
let cppNamespace = "a::traits";
-// ATTR-INTERFACE: namespace a {
-// ATTR-INTERFACE-NEXT: namespace traits {
+// ATTR-INTERFACE: namespace a::traits {
// ATTR-INTERFACE-NEXT: class SimpleEncodingTrait;
}
@@ -116,8 +113,7 @@ def SimpleOpInterface : OpInterface<"SimpleOpInterface"> {
Simple Op Interface description
}];
-// OP-INTERFACE: namespace a {
-// OP-INTERFACE-NEXT: namespace traits {
+// OP-INTERFACE: namespace a::traits {
// OP-INTERFACE-NEXT: /// Simple Op Interface description
// OP-INTERFACE-NEXT: class SimpleOpInterface;
}
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 6432fae..8842180 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -151,6 +151,7 @@ void registerTestSliceAnalysisPass();
void registerTestSPIRVCPURunnerPipeline();
void registerTestSPIRVFuncSignatureConversion();
void registerTestSPIRVVectorUnrolling();
+void registerTestStridedMetadataRangeAnalysisPass();
void registerTestTensorCopyInsertionPass();
void registerTestTensorLikeAndBufferLikePass();
void registerTestTensorTransforms();
@@ -299,6 +300,7 @@ void registerTestPasses() {
mlir::test::registerTestSPIRVCPURunnerPipeline();
mlir::test::registerTestSPIRVFuncSignatureConversion();
mlir::test::registerTestSPIRVVectorUnrolling();
+ mlir::test::registerTestStridedMetadataRangeAnalysisPass();
mlir::test::registerTestTensorCopyInsertionPass();
mlir::test::registerTestTensorLikeAndBufferLikePass();
mlir::test::registerTestTensorTransforms();
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index f99dcdb..76122a0 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <set>
using namespace mlir;
@@ -41,6 +42,7 @@ processBuffer(raw_ostream &os, std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
bool dumpODS, std::set<std::string> *includedFiles) {
llvm::SourceMgr sourceMgr;
sourceMgr.setIncludeDirs(includeDirs);
+ sourceMgr.setVirtualFileSystem(llvm::vfs::getRealFileSystem());
sourceMgr.AddNewSourceBuffer(std::move(chunkBuffer), SMLoc());
// If we are dumping ODS information, also enable documentation to ensure the
diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp
index d55ad482..11bf9ce 100644
--- a/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/CodeGenHelpers.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
@@ -701,11 +702,7 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
StringRef underlyingToSymFnName = enumInfo.getUnderlyingToSymbolFnName();
auto enumerants = enumInfo.getAllCases();
- SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(cppNamespace, namespaces, "::");
-
- for (auto ns : namespaces)
- os << "namespace " << ns << " {\n";
+ llvm::NamespaceEmitter ns(os, cppNamespace);
// Emit the enum class definition
emitEnumClass(enumDef, enumName, underlyingType, description, enumerants, os);
@@ -766,8 +763,7 @@ public:
os << formatv(attrClassDecl, enumName, attrClassName, baseAttrClassName);
}
- for (auto ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
+ ns.close();
// Generate a generic parser and printer for the enum.
std::string qualName =
@@ -790,13 +786,8 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
EnumInfo enumInfo(enumDef);
- StringRef cppNamespace = enumInfo.getCppNamespace();
- SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(cppNamespace, namespaces, "::");
-
- for (auto ns : namespaces)
- os << "namespace " << ns << " {\n";
+ llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
if (enumInfo.isBitEnum()) {
emitSymToStrFnForBitEnum(enumDef, os);
@@ -810,10 +801,6 @@ static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
if (enumInfo.genSpecializedAttr())
emitSpecializedAttrDef(enumDef, os);
-
- for (auto ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
- os << "\n";
}
static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index 730b5b2..ab8d534 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/CodeGenHelpers.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
@@ -342,11 +343,7 @@ void InterfaceGenerator::emitModelDecl(const Interface &interface) {
}
void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
- for (StringRef ns : namespaces)
- os << "namespace " << ns << " {\n";
-
+ llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
for (auto &method : interface.getMethods()) {
os << "template<typename " << valueTemplate << ">\n";
emitCPPType(method.getReturnType(), os);
@@ -442,18 +439,11 @@ void InterfaceGenerator::emitModelMethodsDef(const Interface &interface) {
method.isStatic() ? &ctx : &nonStaticMethodFmt);
os << "\n}\n";
}
-
- for (StringRef ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
}
void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
- for (StringRef ns : namespaces)
- os << "namespace " << ns << " {\n";
-
- os << "namespace detail {\n";
+ auto cppNamespace = (interface.getCppNamespace() + "::detail").str();
+ llvm::NamespaceEmitter ns(os, cppNamespace);
StringRef interfaceName = interface.getName();
auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -504,10 +494,6 @@ void InterfaceGenerator::emitInterfaceTraitDecl(const Interface &interface) {
os << tblgen::tgfmt(*extraTraitDecls, &traitMethodFmt) << "\n";
os << " };\n";
- os << "}// namespace detail\n";
-
- for (StringRef ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
}
static void emitInterfaceDeclMethods(const Interface &interface,
@@ -533,10 +519,7 @@ static void emitInterfaceDeclMethods(const Interface &interface,
}
void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
- for (StringRef ns : namespaces)
- os << "namespace " << ns << " {\n";
+ llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
// Emit a forward declaration of the interface class so that it becomes usable
// in the signature of its methods.
@@ -545,16 +528,10 @@ void InterfaceGenerator::forwardDeclareInterface(const Interface &interface) {
StringRef interfaceName = interface.getName();
os << "class " << interfaceName << ";\n";
-
- for (StringRef ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
}
void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
- for (StringRef ns : namespaces)
- os << "namespace " << ns << " {\n";
+ llvm::NamespaceEmitter ns(os, interface.getCppNamespace());
StringRef interfaceName = interface.getName();
auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
@@ -631,9 +608,6 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) {
}
os << "};\n";
-
- for (StringRef ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
}
bool InterfaceGenerator::emitInterfaceDecls() {
diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
index 3ead2f0..ca291b5 100644
--- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
@@ -259,8 +259,8 @@ static void emitInterfaceDecl(const Availability &availability,
std::string interfaceTraitsName =
std::string(formatv("{0}Traits", interfaceName));
- StringRef cppNamespace = availability.getInterfaceClassNamespace();
- llvm::NamespaceEmitter nsEmitter(os, cppNamespace);
+ llvm::NamespaceEmitter nsEmitter(os,
+ availability.getInterfaceClassNamespace());
os << "class " << interfaceName << ";\n\n";
// Emit the traits struct containing the concept and model declarations.
@@ -418,15 +418,9 @@ static void emitAvailabilityQueryForBitEnum(const Record &enumDef,
static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
EnumInfo enumInfo(enumDef);
StringRef enumName = enumInfo.getEnumClassName();
- StringRef cppNamespace = enumInfo.getCppNamespace();
auto enumerants = enumInfo.getAllCases();
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(cppNamespace, namespaces, "::");
-
- for (auto ns : namespaces)
- os << "namespace " << ns << " {\n";
-
+ llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
llvm::StringSet<> handledClasses;
// Place all availability specifications to their corresponding
@@ -441,9 +435,6 @@ static void emitEnumDecl(const Record &enumDef, raw_ostream &os) {
enumName);
handledClasses.insert(className);
}
-
- for (auto ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
}
static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
@@ -459,31 +450,19 @@ static bool emitEnumDecls(const RecordKeeper &records, raw_ostream &os) {
static void emitEnumDef(const Record &enumDef, raw_ostream &os) {
EnumInfo enumInfo(enumDef);
- StringRef cppNamespace = enumInfo.getCppNamespace();
-
- llvm::SmallVector<StringRef, 2> namespaces;
- llvm::SplitString(cppNamespace, namespaces, "::");
-
- for (auto ns : namespaces)
- os << "namespace " << ns << " {\n";
+ llvm::NamespaceEmitter ns(os, enumInfo.getCppNamespace());
- if (enumInfo.isBitEnum()) {
+ if (enumInfo.isBitEnum())
emitAvailabilityQueryForBitEnum(enumDef, os);
- } else {
+ else
emitAvailabilityQueryForIntEnum(enumDef, os);
- }
-
- for (auto ns : llvm::reverse(namespaces))
- os << "} // namespace " << ns << "\n";
- os << "\n";
}
static bool emitEnumDefs(const RecordKeeper &records, raw_ostream &os) {
llvm::emitSourceFileHeader("SPIR-V Enum Availability Definitions", os,
records);
- auto defs = records.getAllDerivedDefinitions("EnumInfo");
- for (const auto *def : defs)
+ for (const Record *def : records.getAllDerivedDefinitions("EnumInfo"))
emitEnumDef(*def, os);
return false;
diff --git a/mlir/unittests/IR/RemarkTest.cpp b/mlir/unittests/IR/RemarkTest.cpp
index bcbda90..09c576c 100644
--- a/mlir/unittests/IR/RemarkTest.cpp
+++ b/mlir/unittests/IR/RemarkTest.cpp
@@ -53,10 +53,12 @@ TEST(Remark, TestOutputOptimizationRemark) {
/*missed=*/categoryUnroll,
/*analysis=*/categoryRegister,
/*failed=*/categoryInliner};
-
+ std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+ std::make_unique<remark::RemarkEmittingPolicyAll>();
LogicalResult isEnabled =
mlir::remark::enableOptimizationRemarksWithLLVMStreamer(
- context, yamlFile, llvm::remarks::Format::YAML, cats);
+ context, yamlFile, llvm::remarks::Format::YAML, std::move(policy),
+ cats);
ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
// PASS: something succeeded
@@ -202,9 +204,10 @@ TEST(Remark, TestOutputOptimizationRemarkDiagnostic) {
/*missed=*/categoryUnroll,
/*analysis=*/categoryRegister,
/*failed=*/categoryUnroll};
-
- LogicalResult isEnabled =
- remark::enableOptimizationRemarks(context, nullptr, cats, true);
+ std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+ std::make_unique<remark::RemarkEmittingPolicyAll>();
+ LogicalResult isEnabled = remark::enableOptimizationRemarks(
+ context, nullptr, std::move(policy), cats, true);
ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
@@ -282,8 +285,11 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) {
/*analysis=*/std::nullopt,
/*failed=*/categoryLoopunroll};
+ std::unique_ptr<remark::RemarkEmittingPolicyAll> policy =
+ std::make_unique<remark::RemarkEmittingPolicyAll>();
LogicalResult isEnabled = remark::enableOptimizationRemarks(
- context, std::make_unique<MyCustomStreamer>(), cats, true);
+ context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats,
+ true);
ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
// Remark 1: pass, category LoopUnroll
@@ -311,4 +317,66 @@ TEST(Remark, TestCustomOptimizationRemarkDiagnostic) {
EXPECT_NE(errOut.find(pass2Msg), std::string::npos); // printed
EXPECT_EQ(errOut.find(pass3Msg), std::string::npos); // filtered out
}
+
+TEST(Remark, TestRemarkFinal) {
+ testing::internal::CaptureStderr();
+ const auto *pass1Msg = "I failed";
+ const auto *pass2Msg = "I failed too";
+ const auto *pass3Msg = "I succeeded";
+ const auto *pass4Msg = "I succeeded too";
+
+ std::string categoryLoopunroll("LoopUnroll");
+
+ std::string seenMsg = "";
+
+ {
+ MLIRContext context;
+ Location loc = FileLineColLoc::get(&context, "test.cpp", 1, 5);
+ Location locOther = FileLineColLoc::get(&context, "test.cpp", 55, 5);
+
+ // Setup the remark engine
+ mlir::remark::RemarkCategories cats{/*all=*/"",
+ /*passed=*/categoryLoopunroll,
+ /*missed=*/categoryLoopunroll,
+ /*analysis=*/categoryLoopunroll,
+ /*failed=*/categoryLoopunroll};
+
+ std::unique_ptr<remark::RemarkEmittingPolicyFinal> policy =
+ std::make_unique<remark::RemarkEmittingPolicyFinal>();
+ LogicalResult isEnabled = remark::enableOptimizationRemarks(
+ context, std::make_unique<MyCustomStreamer>(), std::move(policy), cats,
+ true);
+ ASSERT_TRUE(succeeded(isEnabled)) << "Failed to enable remark engine";
+
+ // Remark 1: failure
+ remark::failed(
+ loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+ << pass1Msg;
+
+ // Remark 2: failure
+ remark::missed(
+ loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+ << remark::reason(pass2Msg);
+
+ // Remark 3: pass
+ remark::passed(
+ loc, remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+ << pass3Msg;
+
+ // Remark 4: pass
+ remark::passed(
+ locOther,
+ remark::RemarkOpts::name("Unroller").category(categoryLoopunroll))
+ << pass4Msg;
+ }
+
+ llvm::errs().flush();
+ std::string errOut = ::testing::internal::GetCapturedStderr();
+
+ // Containment checks for messages.
+ EXPECT_EQ(errOut.find(pass1Msg), std::string::npos); // dropped
+ EXPECT_EQ(errOut.find(pass2Msg), std::string::npos); // dropped
+ EXPECT_NE(errOut.find(pass3Msg), std::string::npos); // shown
+ EXPECT_NE(errOut.find(pass4Msg), std::string::npos); // shown
+}
} // namespace
diff --git a/mlir/utils/generate-test-checks.py b/mlir/utils/generate-test-checks.py
index f80a181..3712a6b 100755
--- a/mlir/utils/generate-test-checks.py
+++ b/mlir/utils/generate-test-checks.py
@@ -31,13 +31,16 @@ import argparse
import os # Used to advertise this file's name ("autogenerated_note").
import re
import sys
+from collections import Counter
ADVERT_BEGIN = "// NOTE: Assertions have been autogenerated by "
ADVERT_END = """
-// The script is designed to make adding checks to
-// a test case fast, it is *not* designed to be authoritative
-// about what constitutes a good test! The CHECK should be
-// minimized and named to reflect the test intent.
+// This script is intended to make adding checks to a test case quick and easy.
+// It is *not* authoritative about what constitutes a good test. After using the
+// script, be sure to review and refine the generated checks. For example,
+// CHECK lines should be minimized and named to reflect the test’s intent.
+// For comprehensive guidelines, see:
+// * https://mlir.llvm.org/getting_started/TestingGuide/
"""
@@ -45,6 +48,9 @@ ADVERT_END = """
SSA_RE_STR = "[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*"
SSA_RE = re.compile(SSA_RE_STR)
+# Regex matching `dialect.op_name` (e.g. `vector.transfer_read`).
+SSA_OP_NAME_RE = re.compile(r"\b(?:\s=\s[a-z_]+)[.]([a-z_]+)\b")
+
# Regex matching the left-hand side of an assignment
SSA_RESULTS_STR = r'\s*(%' + SSA_RE_STR + r')(\s*,\s*(%' + SSA_RE_STR + r'))*\s*='
SSA_RESULTS_RE = re.compile(SSA_RESULTS_STR)
@@ -63,7 +69,12 @@ ATTR_DEF_RE = re.compile(ATTR_DEF_RE_STR)
class VariableNamer:
def __init__(self, variable_names):
self.scopes = []
+ # Counter for generic FileCHeck names, e.g. VAL_#N
self.name_counter = 0
+ # Counters for FileCheck names derived from Op names, e.g.
+ # TRANSFER_READ_#N (based on `vector.transfer_read`). Note, there's a
+ # dedicated counter for every Op type present in the input.
+ self.op_name_counter = Counter()
# Number of variable names to still generate in parent scope
self.generate_in_parent_scope_left = 0
@@ -77,17 +88,29 @@ class VariableNamer:
self.generate_in_parent_scope_left = n
# Generate a substitution name for the given ssa value name.
- def generate_name(self, source_variable_name, use_ssa_name):
+ def generate_name(self, source_variable_name, use_ssa_name, op_name=""):
# Compute variable name
- variable_name = self.variable_names.pop(0) if len(self.variable_names) > 0 else ''
- if variable_name == '':
+ variable_name = (
+ self.variable_names.pop(0) if len(self.variable_names) > 0 else ""
+ )
+ if variable_name == "":
# If `use_ssa_name` is set, use the MLIR SSA value name to generate
# a FileCHeck substation string. As FileCheck requires these
# strings to start with a character, skip MLIR variables starting
# with a digit (e.g. `%0`).
+ #
+ # The next fallback option is to use the op name, if the
+ # corresponding match succeeds.
+ #
+ # If neither worked, use a generic name: `VAL_#N`.
if use_ssa_name and source_variable_name[0].isalpha():
variable_name = source_variable_name.upper()
+ elif op_name != "":
+ variable_name = (
+ op_name.upper() + "_" + str(self.op_name_counter[op_name])
+ )
+ self.op_name_counter[op_name] += 1
else:
variable_name = "VAL_" + str(self.name_counter)
self.name_counter += 1
@@ -123,6 +146,7 @@ class VariableNamer:
def clear_names(self):
self.name_counter = 0
self.used_variable_names = set()
+ self.op_name_counter.clear()
class AttributeNamer:
@@ -170,8 +194,12 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
# Process the rest that contained an SSA value name.
for chunk in line_chunks:
- m = SSA_RE.match(chunk)
- ssa_name = m.group(0) if m is not None else ''
+ ssa = SSA_RE.match(chunk)
+ op_name_with_dialect = SSA_OP_NAME_RE.search(chunk)
+ ssa_name = ssa.group(0) if ssa is not None else ""
+ op_name = (
+ op_name_with_dialect.group(1) if op_name_with_dialect is not None else ""
+ )
# Check if an existing variable exists for this name.
variable = None
@@ -185,7 +213,7 @@ def process_line(line_chunks, variable_namer, use_ssa_name=False, strict_name_re
output_line += "%[[" + variable + "]]"
else:
# Otherwise, generate a new variable.
- variable = variable_namer.generate_name(ssa_name, use_ssa_name)
+ variable = variable_namer.generate_name(ssa_name, use_ssa_name, op_name)
if strict_name_re:
# Use stricter regexp for the variable name, if requested.
# Greedy matching may cause issues with the generic '.*'