aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib')
-rw-r--r--mlir/lib/Bindings/Python/Rewrite.cpp2
-rw-r--r--mlir/lib/CAPI/Transforms/Rewrite.cpp2
-rw-r--r--mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp2
-rw-r--r--mlir/lib/Conversion/MathToROCDL/CMakeLists.txt1
-rw-r--r--mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp76
-rw-r--r--mlir/lib/Conversion/XeGPUToXeVM/CMakeLists.txt1
-rw-r--r--mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp168
-rw-r--r--mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp19
-rw-r--r--mlir/lib/Dialect/Affine/IR/AffineOps.cpp171
-rw-r--r--mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp70
-rw-r--r--mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt6
-rw-r--r--mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp139
-rw-r--r--mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp16
-rw-r--r--mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp140
-rw-r--r--mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp36
-rw-r--r--mlir/lib/Dialect/MemRef/Transforms/EmulateWideInt.cpp2
-rw-r--r--mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp172
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp26
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp48
-rw-r--r--mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp2
-rw-r--r--mlir/lib/Dialect/WasmSSA/IR/WasmSSAOps.cpp141
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp146
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp115
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp7
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp3
-rw-r--r--mlir/lib/RegisterAllPasses.cpp1
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp2
-rw-r--r--mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp5
-rw-r--r--mlir/lib/Target/Wasm/TranslateFromWasm.cpp459
29 files changed, 1498 insertions, 480 deletions
diff --git a/mlir/lib/Bindings/Python/Rewrite.cpp b/mlir/lib/Bindings/Python/Rewrite.cpp
index 5ddb3fb..0f0ed22 100644
--- a/mlir/lib/Bindings/Python/Rewrite.cpp
+++ b/mlir/lib/Bindings/Python/Rewrite.cpp
@@ -205,7 +205,7 @@ public:
nb::object res = f(opView, PyPatternRewriter(rewriter));
return logicalResultFromObject(res);
};
- MlirRewritePattern pattern = mlirOpRewritePattenCreate(
+ MlirRewritePattern pattern = mlirOpRewritePatternCreate(
rootName, benefit, ctx, callbacks, matchAndRewrite.ptr(),
/* nGeneratedNames */ 0,
/* generatedNames */ nullptr);
diff --git a/mlir/lib/CAPI/Transforms/Rewrite.cpp b/mlir/lib/CAPI/Transforms/Rewrite.cpp
index 46c329d..41ceb15 100644
--- a/mlir/lib/CAPI/Transforms/Rewrite.cpp
+++ b/mlir/lib/CAPI/Transforms/Rewrite.cpp
@@ -341,7 +341,7 @@ private:
} // namespace mlir
-MlirRewritePattern mlirOpRewritePattenCreate(
+MlirRewritePattern mlirOpRewritePatternCreate(
MlirStringRef rootName, unsigned benefit, MlirContext context,
MlirRewritePatternCallbacks callbacks, void *userData,
size_t nGeneratedNames, MlirStringRef *generatedNames) {
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index b215211..c03f3a5 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -484,5 +484,5 @@ void mlir::populateGpuToROCDLConversionPatterns(
GPUSubgroupBroadcastOpToROCDL>(converter);
patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
- populateMathToROCDLConversionPatterns(converter, patterns);
+ populateMathToROCDLConversionPatterns(converter, patterns, chipset);
}
diff --git a/mlir/lib/Conversion/MathToROCDL/CMakeLists.txt b/mlir/lib/Conversion/MathToROCDL/CMakeLists.txt
index 2771955a..8cc3fde 100644
--- a/mlir/lib/Conversion/MathToROCDL/CMakeLists.txt
+++ b/mlir/lib/Conversion/MathToROCDL/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRMathToROCDL
Core
LINK_LIBS PUBLIC
+ MLIRAMDGPUUtils
MLIRDialectUtils
MLIRFuncDialect
MLIRGPUToGPURuntimeTransforms
diff --git a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
index df219f3..a2dfc12 100644
--- a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
+++ b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
@@ -10,6 +10,8 @@
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/LLVMCommon/VectorPattern.h"
+#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
@@ -19,6 +21,7 @@
#include "mlir/IR/PatternMatch.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
+#include "llvm/Support/DebugLog.h"
#include "../GPUCommon/GPUOpsLowering.h"
#include "../GPUCommon/OpToFuncCallLowering.h"
@@ -42,8 +45,46 @@ static void populateOpPatterns(const LLVMTypeConverter &converter,
f32ApproxFunc, f16Func);
}
+struct ClampFOpConversion final
+ : public ConvertOpToLLVMPattern<math::ClampFOp> {
+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+ LogicalResult
+ matchAndRewrite(math::ClampFOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // Only f16 and f32 types are supported by fmed3
+ Type opTy = op.getType();
+ Type resultType = getTypeConverter()->convertType(opTy);
+
+ if (auto vectorType = dyn_cast<VectorType>(opTy))
+ opTy = vectorType.getElementType();
+
+ if (!isa<Float16Type, Float32Type>(opTy))
+ return rewriter.notifyMatchFailure(
+ op, "fmed3 only supports f16 and f32 types");
+
+ // Handle multi-dimensional vectors (converted to LLVM arrays)
+ if (auto arrayType = dyn_cast<LLVM::LLVMArrayType>(resultType))
+ return LLVM::detail::handleMultidimensionalVectors(
+ op.getOperation(), adaptor.getOperands(), *getTypeConverter(),
+ [&](Type llvm1DVectorTy, ValueRange operands) -> Value {
+ typename math::ClampFOp::Adaptor adaptor(operands);
+ return ROCDL::FMed3Op::create(rewriter, op.getLoc(), llvm1DVectorTy,
+ adaptor.getValue(), adaptor.getMin(),
+ adaptor.getMax());
+ },
+ rewriter);
+
+ // Handle 1D vectors and scalars directly
+ rewriter.replaceOpWithNewOp<ROCDL::FMed3Op>(op, op.getType(), op.getValue(),
+ op.getMin(), op.getMax());
+ return success();
+ }
+};
+
void mlir::populateMathToROCDLConversionPatterns(
- const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
+ const LLVMTypeConverter &converter, RewritePatternSet &patterns,
+ std::optional<amdgpu::Chipset> chipset) {
// Handled by mathToLLVM: math::AbsIOp
// Handled by mathToLLVM: math::AbsFOp
// Handled by mathToLLVM: math::CopySignOp
@@ -118,15 +159,21 @@ void mlir::populateMathToROCDLConversionPatterns(
// worth creating a separate pass for it.
populateOpPatterns<arith::RemFOp>(converter, patterns, "__ocml_fmod_f32",
"__ocml_fmod_f64", "__ocml_fmod_f16");
+
+ if (chipset.has_value() && chipset->majorVersion >= 9) {
+ patterns.add<ClampFOpConversion>(converter);
+ } else {
+ LDBG() << "Chipset dependent patterns were not added";
+ }
}
-namespace {
-struct ConvertMathToROCDLPass
- : public impl::ConvertMathToROCDLBase<ConvertMathToROCDLPass> {
- ConvertMathToROCDLPass() = default;
+struct ConvertMathToROCDLPass final
+ : impl::ConvertMathToROCDLBase<ConvertMathToROCDLPass> {
+ using impl::ConvertMathToROCDLBase<
+ ConvertMathToROCDLPass>::ConvertMathToROCDLBase;
+
void runOnOperation() override;
};
-} // namespace
void ConvertMathToROCDLPass::runOnOperation() {
auto m = getOperation();
@@ -135,10 +182,21 @@ void ConvertMathToROCDLPass::runOnOperation() {
RewritePatternSet patterns(&getContext());
LowerToLLVMOptions options(ctx, DataLayout(m));
LLVMTypeConverter converter(ctx, options);
- populateMathToROCDLConversionPatterns(converter, patterns);
+
+ FailureOr<amdgpu::Chipset> maybeChipset;
+ if (!chipset.empty()) {
+ maybeChipset = amdgpu::Chipset::parse(chipset);
+ if (failed(maybeChipset))
+ return signalPassFailure();
+ }
+ populateMathToROCDLConversionPatterns(
+ converter, patterns,
+ succeeded(maybeChipset) ? std::optional(*maybeChipset) : std::nullopt);
+
ConversionTarget target(getContext());
- target.addLegalDialect<BuiltinDialect, func::FuncDialect,
- vector::VectorDialect, LLVM::LLVMDialect>();
+ target
+ .addLegalDialect<BuiltinDialect, func::FuncDialect, vector::VectorDialect,
+ LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FAbsOp,
LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp,
LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp,
diff --git a/mlir/lib/Conversion/XeGPUToXeVM/CMakeLists.txt b/mlir/lib/Conversion/XeGPUToXeVM/CMakeLists.txt
index 84b2580..dd9edc4 100644
--- a/mlir/lib/Conversion/XeGPUToXeVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/XeGPUToXeVM/CMakeLists.txt
@@ -21,6 +21,7 @@ add_mlir_conversion_library(MLIRXeGPUToXeVM
MLIRIndexDialect
MLIRSCFDialect
MLIRXeGPUDialect
+ MLIRXeGPUUtils
MLIRPass
MLIRTransforms
MLIRSCFTransforms
diff --git a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
index ddcbc44..fcbf66d 100644
--- a/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
+++ b/mlir/lib/Conversion/XeGPUToXeVM/XeGPUToXeVM.cpp
@@ -22,6 +22,7 @@
#include "mlir/Dialect/SCF/Transforms/Patterns.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
+#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
@@ -63,6 +64,7 @@ static int32_t getNumericXeVMAddrSpace(xegpu::MemorySpace xeGpuMemspace) {
case xegpu::MemorySpace::SLM:
return static_cast<int>(xevm::AddrSpace::SHARED);
}
+ llvm_unreachable("Unknown XeGPU memory space");
}
// Get same bitwidth flat vector type of new element type.
@@ -186,6 +188,7 @@ class CreateNdDescToXeVMPattern
int64_t rank = mixedSizes.size();
if (rank != 2)
return rewriter.notifyMatchFailure(op, "Expected 2D shape.");
+
auto sourceTy = source.getType();
auto sourceMemrefTy = dyn_cast<MemRefType>(sourceTy);
// If source is a memref, we need to extract the aligned pointer as index.
@@ -364,10 +367,11 @@ class LoadStorePrefetchNdToXeVMPattern : public OpConversionPattern<OpType> {
// Add a builder that creates
// offset * elemByteSize + baseAddr
-static Value addOffset(ConversionPatternRewriter &rewriter, Location loc,
- Value baseAddr, Value offset, int64_t elemByteSize) {
+static Value addOffsetToBaseAddr(ConversionPatternRewriter &rewriter,
+ Location loc, Value baseAddr, Value offset,
+ int64_t elemByteSize) {
Value byteSize = arith::ConstantIntOp::create(
- rewriter, loc, rewriter.getI64Type(), elemByteSize);
+ rewriter, loc, baseAddr.getType(), elemByteSize);
Value byteOffset = arith::MulIOp::create(rewriter, loc, offset, byteSize);
Value newAddr = arith::AddIOp::create(rewriter, loc, baseAddr, byteOffset);
return newAddr;
@@ -443,7 +447,8 @@ class LoadStoreToXeVMPattern : public OpConversionPattern<OpType> {
// If offset is provided, we add them to the base pointer.
// Offset is in number of elements, we need to multiply by
// element byte size.
- basePtrI64 = addOffset(rewriter, loc, basePtrI64, offset, elemByteSize);
+ basePtrI64 =
+ addOffsetToBaseAddr(rewriter, loc, basePtrI64, offset, elemByteSize);
}
// Convert base pointer (i64) to LLVM pointer type.
Value basePtrLLVM =
@@ -506,6 +511,147 @@ class LoadStoreToXeVMPattern : public OpConversionPattern<OpType> {
}
};
+// Lower xegpu::CreateMemDescOp to memref::ViewOp. Since SLM access instructions
+// on Xe2 and Xe3 operate on 32-bit or 64-bit units, all data types smaller than
+// 32 bits will be converted to 32 bits.
+class CreateMemDescOpPattern final
+ : public OpConversionPattern<xegpu::CreateMemDescOp> {
+public:
+ using OpConversionPattern<xegpu::CreateMemDescOp>::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(xegpu::CreateMemDescOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+
+ auto resTy = op.getMemDesc();
+
+ // Create the result MemRefType with the same shape, element type, and
+ // memory space
+ auto newResTy = getTypeConverter()->convertType<MemRefType>(resTy);
+
+ Value zero = arith::ConstantIndexOp::create(rewriter, op.getLoc(), 0);
+ auto viewOp = memref::ViewOp::create(rewriter, op.getLoc(), newResTy,
+ op.getSource(), zero, ValueRange());
+ rewriter.replaceOp(op, viewOp);
+ return success();
+ }
+};
+
+template <typename OpType,
+ typename = std::enable_if_t<llvm::is_one_of<
+ OpType, xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>::value>>
+class LoadStoreMatrixToXeVMPattern : public OpConversionPattern<OpType> {
+ using OpConversionPattern<OpType>::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(OpType op, typename OpType::Adaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+
+ SmallVector<OpFoldResult> offsets = op.getMixedOffsets();
+ if (offsets.empty())
+ return rewriter.notifyMatchFailure(op, "Expected offset to be provided.");
+
+ auto loc = op.getLoc();
+ auto ctxt = rewriter.getContext();
+ Value basePtrStruct = adaptor.getMemDesc();
+ Value mdescVal = op.getMemDesc();
+ // Load result or Store value Type can be vector or scalar.
+ Value data;
+ if constexpr (std::is_same_v<OpType, xegpu::LoadMatrixOp>)
+ data = op.getResult();
+ else
+ data = adaptor.getData();
+ VectorType valOrResVecTy = dyn_cast<VectorType>(data.getType());
+ if (!valOrResVecTy)
+ valOrResVecTy = VectorType::get(1, data.getType());
+
+ int64_t elemBitWidth =
+ valOrResVecTy.getElementType().getIntOrFloatBitWidth();
+ // Element type must be multiple of 8 bits.
+ if (elemBitWidth % 8 != 0)
+ return rewriter.notifyMatchFailure(
+ op, "Expected element type bit width to be multiple of 8.");
+ int64_t elemByteSize = elemBitWidth / 8;
+
+ // Default memory space is SLM.
+ LLVM::LLVMPointerType ptrTypeLLVM = LLVM::LLVMPointerType::get(
+ ctxt, getNumericXeVMAddrSpace(xegpu::MemorySpace::SLM));
+
+ auto mdescTy = cast<xegpu::MemDescType>(mdescVal.getType());
+
+ Value basePtrLLVM = memref::ExtractAlignedPointerAsIndexOp::create(
+ rewriter, loc, basePtrStruct);
+
+ // Convert base pointer (ptr) to i32
+ Value basePtrI32 = arith::IndexCastUIOp::create(
+ rewriter, loc, rewriter.getI32Type(), basePtrLLVM);
+
+ Value linearOffset = mdescTy.getLinearOffsets(rewriter, loc, offsets);
+ linearOffset = arith::IndexCastUIOp::create(
+ rewriter, loc, rewriter.getI32Type(), linearOffset);
+ basePtrI32 = addOffsetToBaseAddr(rewriter, loc, basePtrI32, linearOffset,
+ elemByteSize);
+
+ // convert base pointer (i32) to LLVM pointer type
+ basePtrLLVM =
+ LLVM::IntToPtrOp::create(rewriter, loc, ptrTypeLLVM, basePtrI32);
+
+ if (op.getSubgroupBlockIoAttr()) {
+ // if the attribute 'subgroup_block_io' is set to true, it lowers to
+ // xevm.blockload
+
+ Type intElemTy = rewriter.getIntegerType(elemBitWidth);
+ VectorType intVecTy =
+ VectorType::get(valOrResVecTy.getShape(), intElemTy);
+
+ if constexpr (std::is_same_v<OpType, xegpu::LoadMatrixOp>) {
+ Value loadOp =
+ xevm::BlockLoadOp::create(rewriter, loc, intVecTy, basePtrLLVM);
+ if (intVecTy != valOrResVecTy) {
+ loadOp =
+ vector::BitCastOp::create(rewriter, loc, valOrResVecTy, loadOp);
+ }
+ rewriter.replaceOp(op, loadOp);
+ } else {
+ Value dataToStore = adaptor.getData();
+ if (valOrResVecTy != intVecTy) {
+ dataToStore =
+ vector::BitCastOp::create(rewriter, loc, intVecTy, dataToStore);
+ }
+ xevm::BlockStoreOp::create(rewriter, loc, basePtrLLVM, dataToStore,
+ nullptr);
+ rewriter.eraseOp(op);
+ }
+ return success();
+ }
+
+ if (valOrResVecTy.getNumElements() >= 1) {
+ auto chipOpt = xegpu::getChipStr(op);
+ if (!chipOpt || (*chipOpt != "pvc" && *chipOpt != "bmg")) {
+ // the lowering for chunk load only works for pvc and bmg
+ return rewriter.notifyMatchFailure(
+ op, "The lowering is specific to pvc or bmg.");
+ }
+ }
+
+ if constexpr (std::is_same_v<OpType, xegpu::LoadMatrixOp>) {
+ // if the size of valOrResVecTy is 1, it lowers to a scalar load/store
+ // operation. LLVM load/store does not support vector of size 1, so we
+ // need to handle this case separately.
+ auto scalarTy = valOrResVecTy.getElementType();
+ LLVM::LoadOp loadOp;
+ if (valOrResVecTy.getNumElements() == 1)
+ loadOp = LLVM::LoadOp::create(rewriter, loc, scalarTy, basePtrLLVM);
+ else
+ loadOp =
+ LLVM::LoadOp::create(rewriter, loc, valOrResVecTy, basePtrLLVM);
+ rewriter.replaceOp(op, loadOp);
+ } else {
+ LLVM::StoreOp::create(rewriter, loc, adaptor.getData(), basePtrLLVM);
+ rewriter.eraseOp(op);
+ }
+ return success();
+ }
+};
+
class PrefetchToXeVMPattern : public OpConversionPattern<xegpu::PrefetchOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
@@ -548,8 +694,8 @@ class PrefetchToXeVMPattern : public OpConversionPattern<xegpu::PrefetchOp> {
op, "Expected element type bit width to be multiple of 8.");
elemByteSize = elemBitWidth / 8;
}
- basePtrI64 =
- addOffset(rewriter, loc, basePtrI64, offsets, elemByteSize);
+ basePtrI64 = addOffsetToBaseAddr(rewriter, loc, basePtrI64, offsets,
+ elemByteSize);
}
}
// Default memory space is global.
@@ -786,6 +932,13 @@ struct ConvertXeGPUToXeVMPass
auto i32Type = IntegerType::get(&getContext(), 32);
return VectorType::get(8, i32Type);
});
+ // Convert MemDescType into flattened MemRefType for SLM
+ typeConverter.addConversion([&](xegpu::MemDescType type) -> Type {
+ Type elemTy = type.getElementType();
+ int numElems = type.getNumElements();
+ return MemRefType::get(numElems, elemTy, AffineMap(), 3);
+ });
+
typeConverter.addConversion([&](MemRefType type) -> Type {
// Convert MemRefType to i64 type.
return IntegerType::get(&getContext(), 64);
@@ -940,6 +1093,9 @@ void mlir::populateXeGPUToXeVMConversionPatterns(
LoadStoreToXeVMPattern<xegpu::LoadGatherOp>,
LoadStoreToXeVMPattern<xegpu::StoreScatterOp>>(
typeConverter, patterns.getContext());
+ patterns.add<LoadStoreMatrixToXeVMPattern<xegpu::LoadMatrixOp>,
+ LoadStoreMatrixToXeVMPattern<xegpu::StoreMatrixOp>,
+ CreateMemDescOpPattern>(typeConverter, patterns.getContext());
patterns.add<FenceToXeVMPattern, DpasToXeVMPattern>(typeConverter,
patterns.getContext());
}
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index c798adb..61166db 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -339,6 +339,25 @@ void RawBufferAtomicCmpswapOp::getCanonicalizationPatterns(
}
//===----------------------------------------------------------------------===//
+// ScaledExtPacked816Op
+//===----------------------------------------------------------------------===//
+LogicalResult ScaledExtPacked816Op::verify() {
+ int blockSize = getBlockSize();
+ assert((blockSize == 16 || blockSize == 32) && "invalid block size");
+ int firstScaleByte = getFirstScaleByte();
+ if (blockSize == 16 && !llvm::is_contained({0, 1}, firstScaleByte)) {
+ return emitOpError(
+ "blockSize of 16 can only have firstScaleByte be 0 or 1.");
+ }
+ if (blockSize == 32 && !llvm::is_contained({0, 2}, firstScaleByte)) {
+ return emitOpError(
+ "blockSize of 32 can only have firstScaleByte be 0 or 2.");
+ }
+
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
// WMMAOp
//===----------------------------------------------------------------------===//
LogicalResult WMMAOp::verify() {
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 749e2ba..e0a53cd 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -2600,6 +2600,65 @@ static LogicalResult foldLoopBounds(AffineForOp forOp) {
return success(folded);
}
+/// Returns constant trip count in trivial cases.
+static std::optional<uint64_t> getTrivialConstantTripCount(AffineForOp forOp) {
+ int64_t step = forOp.getStepAsInt();
+ if (!forOp.hasConstantBounds() || step <= 0)
+ return std::nullopt;
+ int64_t lb = forOp.getConstantLowerBound();
+ int64_t ub = forOp.getConstantUpperBound();
+ return ub - lb <= 0 ? 0 : (ub - lb + step - 1) / step;
+}
+
+/// Fold the empty loop.
+static SmallVector<OpFoldResult> AffineForEmptyLoopFolder(AffineForOp forOp) {
+ if (!llvm::hasSingleElement(*forOp.getBody()))
+ return {};
+ if (forOp.getNumResults() == 0)
+ return {};
+ std::optional<uint64_t> tripCount = getTrivialConstantTripCount(forOp);
+ if (tripCount == 0) {
+ // The initial values of the iteration arguments would be the op's
+ // results.
+ return forOp.getInits();
+ }
+ SmallVector<Value, 4> replacements;
+ auto yieldOp = cast<AffineYieldOp>(forOp.getBody()->getTerminator());
+ auto iterArgs = forOp.getRegionIterArgs();
+ bool hasValDefinedOutsideLoop = false;
+ bool iterArgsNotInOrder = false;
+ for (unsigned i = 0, e = yieldOp->getNumOperands(); i < e; ++i) {
+ Value val = yieldOp.getOperand(i);
+ BlockArgument *iterArgIt = llvm::find(iterArgs, val);
+ // TODO: It should be possible to perform a replacement by computing the
+ // last value of the IV based on the bounds and the step.
+ if (val == forOp.getInductionVar())
+ return {};
+ if (iterArgIt == iterArgs.end()) {
+ // `val` is defined outside of the loop.
+ assert(forOp.isDefinedOutsideOfLoop(val) &&
+ "must be defined outside of the loop");
+ hasValDefinedOutsideLoop = true;
+ replacements.push_back(val);
+ } else {
+ unsigned pos = std::distance(iterArgs.begin(), iterArgIt);
+ if (pos != i)
+ iterArgsNotInOrder = true;
+ replacements.push_back(forOp.getInits()[pos]);
+ }
+ }
+ // Bail out when the trip count is unknown and the loop returns any value
+ // defined outside of the loop or any iterArg out of order.
+ if (!tripCount.has_value() &&
+ (hasValDefinedOutsideLoop || iterArgsNotInOrder))
+ return {};
+ // Bail out when the loop iterates more than once and it returns any iterArg
+ // out of order.
+ if (tripCount.has_value() && tripCount.value() >= 2 && iterArgsNotInOrder)
+ return {};
+ return llvm::to_vector_of<OpFoldResult>(replacements);
+}
+
/// Canonicalize the bounds of the given loop.
static LogicalResult canonicalizeLoopBounds(AffineForOp forOp) {
SmallVector<Value, 4> lbOperands(forOp.getLowerBoundOperands());
@@ -2631,79 +2690,30 @@ static LogicalResult canonicalizeLoopBounds(AffineForOp forOp) {
return success();
}
-namespace {
-/// Returns constant trip count in trivial cases.
-static std::optional<uint64_t> getTrivialConstantTripCount(AffineForOp forOp) {
- int64_t step = forOp.getStepAsInt();
- if (!forOp.hasConstantBounds() || step <= 0)
- return std::nullopt;
- int64_t lb = forOp.getConstantLowerBound();
- int64_t ub = forOp.getConstantUpperBound();
- return ub - lb <= 0 ? 0 : (ub - lb + step - 1) / step;
+/// Returns true if the affine.for has zero iterations in trivial cases.
+static bool hasTrivialZeroTripCount(AffineForOp op) {
+ return getTrivialConstantTripCount(op) == 0;
}
-/// This is a pattern to fold trivially empty loop bodies.
-/// TODO: This should be moved into the folding hook.
-struct AffineForEmptyLoopFolder : public OpRewritePattern<AffineForOp> {
- using OpRewritePattern<AffineForOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(AffineForOp forOp,
- PatternRewriter &rewriter) const override {
- // Check that the body only contains a yield.
- if (!llvm::hasSingleElement(*forOp.getBody()))
- return failure();
- if (forOp.getNumResults() == 0)
- return success();
- std::optional<uint64_t> tripCount = getTrivialConstantTripCount(forOp);
- if (tripCount == 0) {
- // The initial values of the iteration arguments would be the op's
- // results.
- rewriter.replaceOp(forOp, forOp.getInits());
- return success();
- }
- SmallVector<Value, 4> replacements;
- auto yieldOp = cast<AffineYieldOp>(forOp.getBody()->getTerminator());
- auto iterArgs = forOp.getRegionIterArgs();
- bool hasValDefinedOutsideLoop = false;
- bool iterArgsNotInOrder = false;
- for (unsigned i = 0, e = yieldOp->getNumOperands(); i < e; ++i) {
- Value val = yieldOp.getOperand(i);
- auto *iterArgIt = llvm::find(iterArgs, val);
- // TODO: It should be possible to perform a replacement by computing the
- // last value of the IV based on the bounds and the step.
- if (val == forOp.getInductionVar())
- return failure();
- if (iterArgIt == iterArgs.end()) {
- // `val` is defined outside of the loop.
- assert(forOp.isDefinedOutsideOfLoop(val) &&
- "must be defined outside of the loop");
- hasValDefinedOutsideLoop = true;
- replacements.push_back(val);
- } else {
- unsigned pos = std::distance(iterArgs.begin(), iterArgIt);
- if (pos != i)
- iterArgsNotInOrder = true;
- replacements.push_back(forOp.getInits()[pos]);
- }
- }
- // Bail out when the trip count is unknown and the loop returns any value
- // defined outside of the loop or any iterArg out of order.
- if (!tripCount.has_value() &&
- (hasValDefinedOutsideLoop || iterArgsNotInOrder))
- return failure();
- // Bail out when the loop iterates more than once and it returns any iterArg
- // out of order.
- if (tripCount.has_value() && tripCount.value() >= 2 && iterArgsNotInOrder)
- return failure();
- rewriter.replaceOp(forOp, replacements);
- return success();
+LogicalResult AffineForOp::fold(FoldAdaptor adaptor,
+ SmallVectorImpl<OpFoldResult> &results) {
+ bool folded = succeeded(foldLoopBounds(*this));
+ folded |= succeeded(canonicalizeLoopBounds(*this));
+ if (hasTrivialZeroTripCount(*this) && getNumResults() != 0) {
+ // The initial values of the loop-carried variables (iter_args) are the
+ // results of the op. But this must be avoided for an affine.for op that
+ // does not return any results. Since ops that do not return results cannot
+ // be folded away, we would enter an infinite loop of folds on the same
+ // affine.for op.
+ results.assign(getInits().begin(), getInits().end());
+ folded = true;
}
-};
-} // namespace
-
-void AffineForOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<AffineForEmptyLoopFolder>(context);
+ SmallVector<OpFoldResult> foldResults = AffineForEmptyLoopFolder(*this);
+ if (!foldResults.empty()) {
+ results.assign(foldResults);
+ folded = true;
+ }
+ return success(folded);
}
OperandRange AffineForOp::getEntrySuccessorOperands(RegionBranchPoint point) {
@@ -2746,27 +2756,6 @@ void AffineForOp::getSuccessorRegions(
regions.push_back(RegionSuccessor(getResults()));
}
-/// Returns true if the affine.for has zero iterations in trivial cases.
-static bool hasTrivialZeroTripCount(AffineForOp op) {
- return getTrivialConstantTripCount(op) == 0;
-}
-
-LogicalResult AffineForOp::fold(FoldAdaptor adaptor,
- SmallVectorImpl<OpFoldResult> &results) {
- bool folded = succeeded(foldLoopBounds(*this));
- folded |= succeeded(canonicalizeLoopBounds(*this));
- if (hasTrivialZeroTripCount(*this) && getNumResults() != 0) {
- // The initial values of the loop-carried variables (iter_args) are the
- // results of the op. But this must be avoided for an affine.for op that
- // does not return any results. Since ops that do not return results cannot
- // be folded away, we would enter an infinite loop of folds on the same
- // affine.for op.
- results.assign(getInits().begin(), getInits().end());
- folded = true;
- }
- return success(folded);
-}
-
AffineBound AffineForOp::getLowerBound() {
return AffineBound(*this, getLowerBoundOperands(), getLowerBoundMap());
}
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
index 70faa71..bc17990 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/DropEquivalentBufferResults.cpp
@@ -41,18 +41,37 @@ namespace bufferization {
using namespace mlir;
-/// Return the unique ReturnOp that terminates `funcOp`.
-/// Return nullptr if there is no such unique ReturnOp.
-static func::ReturnOp getAssumedUniqueReturnOp(func::FuncOp funcOp) {
- func::ReturnOp returnOp;
+/// Get all the ReturnOp in the funcOp.
+static SmallVector<func::ReturnOp> getReturnOps(func::FuncOp funcOp) {
+ SmallVector<func::ReturnOp> returnOps;
for (Block &b : funcOp.getBody()) {
if (auto candidateOp = dyn_cast<func::ReturnOp>(b.getTerminator())) {
- if (returnOp)
- return nullptr;
- returnOp = candidateOp;
+ returnOps.push_back(candidateOp);
}
}
- return returnOp;
+ return returnOps;
+}
+
+/// Get the operands at the specified position for all returnOps.
+static SmallVector<Value>
+getReturnOpsOperandInPos(ArrayRef<func::ReturnOp> returnOps, size_t pos) {
+ return llvm::map_to_vector(returnOps, [&](func::ReturnOp returnOp) {
+ return returnOp.getOperand(pos);
+ });
+}
+
+/// Check if all given values are the same buffer as the block argument (modulo
+/// cast ops).
+static bool operandsEqualFuncArgument(ArrayRef<Value> operands,
+ BlockArgument argument) {
+ for (Value val : operands) {
+ while (auto castOp = val.getDefiningOp<memref::CastOp>())
+ val = castOp.getSource();
+
+ if (val != argument)
+ return false;
+ }
+ return true;
}
LogicalResult
@@ -72,40 +91,45 @@ mlir::bufferization::dropEquivalentBufferResults(ModuleOp module) {
for (auto funcOp : module.getOps<func::FuncOp>()) {
if (funcOp.isExternal() || funcOp.isPublic())
continue;
- func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp);
- // TODO: Support functions with multiple blocks.
- if (!returnOp)
+ SmallVector<func::ReturnOp> returnOps = getReturnOps(funcOp);
+ if (returnOps.empty())
continue;
// Compute erased results.
- SmallVector<Value> newReturnValues;
- BitVector erasedResultIndices(funcOp.getFunctionType().getNumResults());
+ size_t numReturnOps = returnOps.size();
+ size_t numReturnValues = funcOp.getFunctionType().getNumResults();
+ SmallVector<SmallVector<Value>> newReturnValues(numReturnOps);
+ BitVector erasedResultIndices(numReturnValues);
DenseMap<int64_t, int64_t> resultToArgs;
- for (const auto &it : llvm::enumerate(returnOp.getOperands())) {
+ for (size_t i = 0; i < numReturnValues; ++i) {
bool erased = false;
+ SmallVector<Value> returnOperands =
+ getReturnOpsOperandInPos(returnOps, i);
for (BlockArgument bbArg : funcOp.getArguments()) {
- Value val = it.value();
- while (auto castOp = val.getDefiningOp<memref::CastOp>())
- val = castOp.getSource();
-
- if (val == bbArg) {
- resultToArgs[it.index()] = bbArg.getArgNumber();
+ if (operandsEqualFuncArgument(returnOperands, bbArg)) {
+ resultToArgs[i] = bbArg.getArgNumber();
erased = true;
break;
}
}
if (erased) {
- erasedResultIndices.set(it.index());
+ erasedResultIndices.set(i);
} else {
- newReturnValues.push_back(it.value());
+ for (auto [newReturnValue, operand] :
+ llvm::zip(newReturnValues, returnOperands)) {
+ newReturnValue.push_back(operand);
+ }
}
}
// Update function.
if (failed(funcOp.eraseResults(erasedResultIndices)))
return failure();
- returnOp.getOperandsMutable().assign(newReturnValues);
+
+ for (auto [returnOp, newReturnValue] :
+ llvm::zip(returnOps, newReturnValues))
+ returnOp.getOperandsMutable().assign(newReturnValue);
// Update function calls.
for (func::CallOp callOp : callerMap[funcOp]) {
diff --git a/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
index 70a9c77..ec68acf 100644
--- a/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
@@ -1,5 +1,6 @@
add_mlir_dialect_library(MLIRGPUPipelines
GPUToNVVMPipeline.cpp
+ GPUToXeVMPipeline.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -11,12 +12,17 @@ add_mlir_dialect_library(MLIRGPUPipelines
MLIRTransforms
MLIRLinalgTransforms
MLIRAffineToStandard
+ MLIRGPUToLLVMSPV
MLIRGPUToNVVMTransforms
MLIRIndexToLLVM
MLIRMathToLLVM
+ MLIRMathToXeVM
MLIRNVGPUToNVVM
MLIRNVVMToLLVM
MLIRReconcileUnrealizedCasts
MLIRSCFToControlFlow
MLIRVectorToSCF
+ MLIRXeGPUTransforms
+ MLIRXeGPUToXeVM
+ MLIRXeVMToLLVM
)
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp
new file mode 100644
index 0000000..1a1485b
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp
@@ -0,0 +1,139 @@
+//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass for testing the lowering to XeVM as a generally
+// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
+// XeGPU ops already embedded in gpu code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/MathToXeVM/MathToXeVM.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
+#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/Pipelines/Passes.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Target/LLVM/XeVM/Target.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+//===----------------------------------------------------------------------===//
+// Pre-GPU common pipeline for both Host and GPU.
+//===----------------------------------------------------------------------===//
+void buildPreGPUCommonPassPipeline(
+ OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+ // builtin.module scope passes.
+ pm.addPass(createCSEPass());
+ pm.addPass(createConvertVectorToSCFPass());
+ {
+ GpuXeVMAttachTargetOptions xevmTargetOptions;
+ xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
+ xevmTargetOptions.triple = options.zebinTriple;
+ xevmTargetOptions.chip = options.zebinChip;
+ xevmTargetOptions.optLevel = options.optLevel;
+ xevmTargetOptions.cmdOptions = options.cmdOptions;
+ pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
+ }
+ pm.addPass(createLowerAffinePass());
+ pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
+}
+
+//===----------------------------------------------------------------------===//
+// GPUModule-specific stuff.
+//===----------------------------------------------------------------------===//
+void buildGPUPassPipeline(OpPassManager &pm,
+ const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+ if (options.xegpuOpLevel == "workgroup") {
+ pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ }
+ if (options.xegpuOpLevel == "subgroup" ||
+ options.xegpuOpLevel == "workgroup") {
+ pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUPropagateLayout());
+ pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
+ }
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM());
+ pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
+ {
+ ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
+ gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex;
+ pm.addNestedPass<gpu::GPUModuleOp>(
+ createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
+ }
+ pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+ pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
+}
+
+//===----------------------------------------------------------------------===//
+// Post-GPU pipeline for both Host and GPU.
+//===----------------------------------------------------------------------===//
+void buildPostGPUCommonPassPipeline(
+ OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+ // builtin.module scope passes.
+ pm.addPass(createSCFToControlFlowPass());
+ pm.addPass(memref::createExpandStridedMetadataPass());
+ {
+ GpuToLLVMConversionPassOptions gpuToLLVMOptions;
+ gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
+ gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
+ pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
+ }
+ pm.addPass(createLowerAffinePass());
+ pm.addPass(createConvertToLLVMPass());
+ pm.addPass(createReconcileUnrealizedCastsPass());
+ // gpu-module-to-binary
+ {
+ GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
+ gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
+ gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
+ pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
+ }
+}
+} // namespace
+
+void mlir::gpu::buildLowerToXeVMPassPipeline(
+ OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
+ // Pre-GPU common pipelines.
+ buildPreGPUCommonPassPipeline(pm, options);
+
+ // GPUModule-specific stuff.
+ buildGPUPassPipeline(pm, options);
+
+ // Post-GPU pipeline for both Host and GPU.
+ buildPostGPUCommonPassPipeline(pm, options);
+}
+
+void mlir::gpu::registerGPUToXeVMPipeline() {
+ PassPipelineRegistration<GPUToXeVMPipelineOptions>(
+ "gpu-lower-to-xevm-pipeline",
+ "The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
+ "code to the "
+ "specified compilation target (default is fatbin) then lowers the host "
+ "code.",
+ buildLowerToXeVMPassPipeline);
+}
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 6192d79..9a8a63e 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -2457,26 +2457,24 @@ transform::PadTilingInterfaceOp::apply(transform::TransformRewriter &rewriter,
}
// Set options.
- TilingInterface paddedOp;
PadTilingInterfaceOptions options;
options.setPaddingValues(paddingValues)
.setPaddingSizes(getMixedPaddingSizes())
.setPadToMultipleOf(getPadToMultipleOf());
- // Apply padding.
- SmallVector<tensor::PadOp> newPadOps;
- FailureOr<TilingInterface> maybePaddedOp = rewriteAsPaddedOp(
- rewriter, cast<TilingInterface>(targetOp.getOperation()), options,
- newPadOps);
- if (failed(maybePaddedOp)) {
+ auto maybePadOps = rewriteAsPaddedOp(
+ rewriter, cast<TilingInterface>(targetOp.getOperation()), options);
+ if (failed(maybePadOps)) {
auto diag = emitSilenceableError() << "failed to pad op";
diag.attachNote(target->getLoc()) << "target op";
return diag;
}
+ const auto &[paddedOperands, paddedOp, slicedResults] = maybePadOps.value();
// Set transform results.
- paddedOps.push_back(cast<TilingInterface>(maybePaddedOp->getOperation()));
- padOps.append(newPadOps.begin(), newPadOps.end());
+ paddedOps.push_back(paddedOp);
+ padOps.append(paddedOperands.begin(), paddedOperands.end());
+ rewriter.replaceOp(targetOp.getOperation(), slicedResults);
}
results.set(cast<OpResult>(getPadded()), paddedOps);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
index 0956c5d..3e787a2 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PadTilingInterface.cpp
@@ -95,10 +95,11 @@ static int64_t extractConstantMultiplier(AffineExpr expr) {
/// - affine_map<(d0, d1) -> (d0 * 3 + d1)>
/// In the future, more general interfaces can be devised to encode similar
/// shape evolutions and map between an op and its operands.
-SmallVector<OpFoldResult> linalg::computePaddedShape(
- RewriterBase &rewriter, TypedValue<RankedTensorType> v,
- AffineMap indexingMap, ArrayRef<OpFoldResult> indexingSizes,
- const PadTilingInterfaceOptions &options) {
+SmallVector<OpFoldResult>
+linalg::computePaddedShape(OpBuilder &builder, TypedValue<RankedTensorType> v,
+ AffineMap indexingMap,
+ ArrayRef<OpFoldResult> indexingSizes,
+ const PadTilingInterfaceOptions &options) {
Location loc = v.getLoc();
SmallVector<OpFoldResult> paddedShape;
auto tensorType = cast<RankedTensorType>(v.getType());
@@ -109,7 +110,7 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
// "Full-rank" padding specification.
SmallVector<OpFoldResult> paddingSizes =
- getFullRankPaddingSizes(rewriter, indexingSizes, options);
+ getFullRankPaddingSizes(builder, indexingSizes, options);
// For each dimension in the operand's shape, iterate over indexingSizes and
// add the various term contributions.
@@ -147,28 +148,27 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
OpFoldResult paddingDimOfr;
if (options.padToMultipleOf) {
AffineExpr d0, s0;
- bindDims(rewriter.getContext(), d0);
- bindSymbols(rewriter.getContext(), s0);
+ bindDims(builder.getContext(), d0);
+ bindSymbols(builder.getContext(), s0);
AffineMap ceilMap = AffineMap::get(1, 1, d0.ceilDiv(s0) * s0);
AffineMap composedMap = projectedMap.compose(ceilMap);
paddingDimOfr = affine::makeComposedFoldedAffineApply(
- rewriter, loc, composedMap,
- {indexingSizes[paddingDim], paddingSize},
+ builder, loc, composedMap, {indexingSizes[paddingDim], paddingSize},
/*composeAffineMin=*/true);
} else {
// Otherwise just set to paddingSize.
paddingDimOfr = affine::makeComposedFoldedAffineApply(
- rewriter, loc, projectedMap, paddingSize);
+ builder, loc, projectedMap, paddingSize);
}
// Adjust for the maximum accessed index, which is (paddingSize - 1) *
// multiplier.
AffineExpr d0;
- bindDims(rewriter.getContext(), d0);
+ bindDims(builder.getContext(), d0);
int64_t multiplier = extractConstantMultiplier(projectedMap.getResult(0));
AffineMap subtractMap = AffineMap::get(1, 0, d0 - multiplier);
OpFoldResult maxAccessIdx = affine::makeComposedFoldedAffineApply(
- rewriter, loc, subtractMap, {paddingDimOfr});
+ builder, loc, subtractMap, {paddingDimOfr});
terms.push_back(maxAccessIdx);
LLVM_DEBUG(DBGS() << "------new term: " << terms.back() << "\n");
@@ -177,19 +177,19 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
// If there are no terms, just return the dim.
if (terms.empty()) {
paddedShape[resultIndex] =
- createFoldedDimOp(rewriter, loc, v, resultIndex);
+ createFoldedDimOp(builder, loc, v, resultIndex);
continue;
}
// Sum individual terms' contributions.
SmallVector<AffineExpr> dims(terms.size());
- bindDimsList(rewriter.getContext(), MutableArrayRef{dims});
+ bindDimsList(builder.getContext(), MutableArrayRef{dims});
AffineExpr sumExpr = dims.front();
for (unsigned i = 1; i < dims.size(); ++i)
sumExpr = sumExpr + dims[i];
// Add 1 to the maximum accessed index and get the final padded size.
- OpFoldResult paddedDimOfr = affine::makeComposedFoldedAffineApply(
- rewriter, loc, sumExpr + 1, terms);
+ OpFoldResult paddedDimOfr =
+ affine::makeComposedFoldedAffineApply(builder, loc, sumExpr + 1, terms);
paddedShape[resultIndex] = paddedDimOfr;
}
@@ -198,7 +198,7 @@ SmallVector<OpFoldResult> linalg::computePaddedShape(
FailureOr<SmallVector<OpFoldResult>>
linalg::computeIndexingMapOpInterfacePaddedShape(
- RewriterBase &rewriter, OpOperand &operandToPad,
+ OpBuilder &builder, OpOperand &operandToPad,
ArrayRef<Range> iterationDomain, const PadTilingInterfaceOptions &options) {
auto transferOp =
llvm::dyn_cast<IndexingMapOpInterface>(operandToPad.getOwner());
@@ -206,9 +206,9 @@ linalg::computeIndexingMapOpInterfacePaddedShape(
return failure();
// clang-format off
- assert(llvm::all_of(iterationDomain, [&rewriter](Range r) {
- return r.offset == OpFoldResult(rewriter.getIndexAttr(0)) &&
- r.stride == OpFoldResult(rewriter.getIndexAttr(1));
+ assert(llvm::all_of(iterationDomain, [&builder](Range r) {
+ return r.offset == OpFoldResult(builder.getIndexAttr(0)) &&
+ r.stride == OpFoldResult(builder.getIndexAttr(1));
}) && "expected 0-offset 1-stride loop ranges");
// clang-format on
SmallVector<OpFoldResult> loopUpperBounds;
@@ -218,13 +218,13 @@ linalg::computeIndexingMapOpInterfacePaddedShape(
AffineMap indexingMap = transferOp.getMatchingIndexingMap(&operandToPad);
return computePaddedShape(
- rewriter, cast<TypedValue<RankedTensorType>>(operandToPad.get()),
+ builder, cast<TypedValue<RankedTensorType>>(operandToPad.get()),
indexingMap, loopUpperBounds, options);
}
/// Pad a single operand to `paddedShape` using `paddingValueAttr` as padding
/// Value.
-static Value padOperand(RewriterBase &rewriter, TilingInterface opToPad,
+static Value padOperand(OpBuilder &builder, TilingInterface opToPad,
TypedValue<RankedTensorType> v,
ArrayRef<OpFoldResult> paddedShape,
Attribute paddingValueAttr) {
@@ -232,15 +232,15 @@ static Value padOperand(RewriterBase &rewriter, TilingInterface opToPad,
if (auto complexTy =
dyn_cast<ComplexType>(getElementTypeOrSelf(v.getType()))) {
if (auto complexAttr = dyn_cast<ArrayAttr>(paddingValueAttr)) {
- paddingValue = complex::ConstantOp::create(rewriter, opToPad.getLoc(),
+ paddingValue = complex::ConstantOp::create(builder, opToPad.getLoc(),
complexTy, complexAttr);
}
} else if (isa<ub::PoisonAttr>(paddingValueAttr)) {
- paddingValue = ub::PoisonOp::create(rewriter, opToPad.getLoc(),
+ paddingValue = ub::PoisonOp::create(builder, opToPad.getLoc(),
getElementTypeOrSelf(v.getType()));
} else if (auto typedAttr = dyn_cast<TypedAttr>(paddingValueAttr)) {
paddingValue =
- arith::ConstantOp::create(rewriter, opToPad.getLoc(), typedAttr);
+ arith::ConstantOp::create(builder, opToPad.getLoc(), typedAttr);
}
assert(paddingValue && "failed to create value from padding attribute");
@@ -259,49 +259,48 @@ static Value padOperand(RewriterBase &rewriter, TilingInterface opToPad,
RankedTensorType::get(tensorShape, getElementTypeOrSelf(v));
LLVM_DEBUG(DBGS() << "--SUCCESS, makeComposedPadHighOp with type: "
<< paddedTensorType);
- return makeComposedPadHighOp(rewriter, opToPad.getLoc(), paddedTensorType, v,
+ return makeComposedPadHighOp(builder, opToPad.getLoc(), paddedTensorType, v,
paddingValue, /*nofold=*/false, dynDims);
}
-FailureOr<TilingInterface> linalg::rewriteAsPaddedOp(
- RewriterBase &rewriter, TilingInterface opToPad,
- const PadTilingInterfaceOptions &constOptions,
- SmallVector<tensor::PadOp> &padOps,
+FailureOr<PadTilingInterfaceResult> linalg::rewriteAsPaddedOp(
+ OpBuilder &builder, TilingInterface toPad,
+ PadTilingInterfaceOptions options,
const PadSizeComputationFunction &computePaddingSizeFun) {
- LLVM_DEBUG(DBGS() << "Start rewriteAsPaddedOp : " << opToPad << "\n");
+ LLVM_DEBUG(DBGS() << "Start rewriteAsPaddedOp : " << toPad << "\n");
+ SmallVector<tensor::PadOp> padOps;
+ Location loc = toPad.getLoc();
- Location loc = opToPad.getLoc();
- PadTilingInterfaceOptions options(constOptions);
// Allow inference of pad values if they are not explicitly specified.
// TODO: be mindful about the value depending on the actual operation.
if (options.paddingValues.empty()) {
- SmallVector<Type> types(opToPad->getOperandTypes());
- llvm::append_range(types, opToPad->getResultTypes());
+ SmallVector<Type> types(toPad->getOperandTypes());
+ llvm::append_range(types, toPad->getResultTypes());
for (Type t : types) {
options.paddingValues.push_back(
- rewriter.getZeroAttr(getElementTypeOrSelf(t)));
+ builder.getZeroAttr(getElementTypeOrSelf(t)));
}
}
- if (llvm::any_of(opToPad->getOperands(),
+ if (llvm::any_of(toPad->getOperands(),
[](Value v) { return isa<MemRefType>(v.getType()); })) {
- return rewriter.notifyMatchFailure(opToPad,
- "expected operation on tensors");
+ LLVM_DEBUG(DBGS() << "Not an operation on tensors: FAIL\n");
+ return failure();
}
- OpBuilder::InsertionGuard g(rewriter);
- // Set IP after opToPad because we also take the dims of opToPad's output.
- rewriter.setInsertionPointAfter(opToPad);
+ OpBuilder::InsertionGuard g(builder);
+ // Set IP after toPad because we also take the dims of toPad's output.
+ builder.setInsertionPointAfter(toPad);
// 1. Get the loopUpperBounds from the TilingInterface.
- SmallVector<Range> iterationDomain = opToPad.getIterationDomain(rewriter);
+ SmallVector<Range> iterationDomain = toPad.getIterationDomain(builder);
// 2. For each operand.
SmallVector<Value> newOperands;
- newOperands.reserve(opToPad->getNumOperands());
- for (OpOperand &opOperand : opToPad->getOpOperands()) {
+ newOperands.reserve(toPad->getNumOperands());
+ for (OpOperand &opOperand : toPad->getOpOperands()) {
Value operand = opOperand.get();
- LLVM_DEBUG(DBGS() << "--start padding oprd: " << operand << "\n");
+ LLVM_DEBUG(DBGS() << "--start padding operand: " << operand << "\n");
// 2.a. Skip scalar-like operands.
Type operandType = operand.getType();
@@ -311,30 +310,31 @@ FailureOr<TilingInterface> linalg::rewriteAsPaddedOp(
newOperands.push_back(operand);
continue;
}
+
// 2.a. Compute padded shape.
FailureOr<SmallVector<OpFoldResult>> maybePaddedShape =
- computePaddingSizeFun(rewriter, opOperand, iterationDomain, options);
+ computePaddingSizeFun(builder, opOperand, iterationDomain, options);
if (failed(maybePaddedShape)) {
- return rewriter.notifyMatchFailure(opToPad, "could not pad op");
+ LLVM_DEBUG(DBGS() << "Could not get padded shape of operand: FAIL\n");
+ return failure();
}
// 2.b. Expect proper `paddingValues`.
// TODO: we may want to allow garbage padding in the future, in which case
// we would just not assert.
if (opOperand.getOperandNumber() >= options.paddingValues.size()) {
- return rewriter.notifyMatchFailure(opToPad,
- "--no padding value specified");
+ LLVM_DEBUG(DBGS() << "Too few padding values specified: FAIL\n");
+ return failure();
}
Attribute paddingValueAttr =
options.paddingValues[opOperand.getOperandNumber()];
// 2.c. Perform actual padding.
- Value paddedOperand = padOperand(
- rewriter, opToPad, cast<TypedValue<RankedTensorType>>(operand),
- *maybePaddedShape, paddingValueAttr);
+ Value paddedOperand =
+ padOperand(builder, toPad, cast<TypedValue<RankedTensorType>>(operand),
+ *maybePaddedShape, paddingValueAttr);
LLVM_DEBUG(DBGS() << "--done padding operand: " << paddedOperand << "\n");
- // 2.d. Perform actual padding.
newOperands.push_back(paddedOperand);
if (auto padOp = paddedOperand.getDefiningOp<tensor::PadOp>())
padOps.push_back(padOp);
@@ -342,38 +342,34 @@ FailureOr<TilingInterface> linalg::rewriteAsPaddedOp(
// 3. Form the resulting tensor::ExtractSliceOp.
ReifiedRankedShapedTypeDims reifiedResultShapes;
- if (failed(reifyResultShapes(rewriter, opToPad, reifiedResultShapes))) {
- LLVM_DEBUG(DBGS() << "--failed to reify result shapes -> FAIL\n");
- return rewriter.notifyMatchFailure(opToPad,
- "failed to reify result shapes");
+ if (failed(reifyResultShapes(builder, toPad, reifiedResultShapes))) {
+ LLVM_DEBUG(DBGS() << "Failed to reify result shapes: FAIL\n");
+ return failure();
}
- assert(reifiedResultShapes.size() == opToPad->getNumResults() &&
+ assert(reifiedResultShapes.size() == toPad->getNumResults() &&
"expected same number of results");
- // Clone `opToPad` to operate on the statically padded shapes.
+ // Clone `toPad` to operate on the statically padded shapes.
auto resultTensorTypes =
- ValueRange(newOperands).take_back(opToPad->getNumResults()).getTypes();
- // clone **should** properly notify the rewriter.
+ ValueRange(newOperands).take_back(toPad->getNumResults()).getTypes();
+ // clone **should** properly notify the builder.
TilingInterface paddedOp =
- clone(rewriter, opToPad, resultTensorTypes, newOperands);
+ clone(builder, toPad, resultTensorTypes, newOperands);
LLVM_DEBUG(DBGS() << "--cloned padded op: " << paddedOp << "\n");
- // Recover the slice out of the new static results. This keeps the original
- // opToPad around because it uses the dims of the original results.
+ // Recover the slice out of the new static results.
SmallVector<Value> paddedSubtensorResults;
- paddedSubtensorResults.reserve(opToPad->getNumResults());
+ paddedSubtensorResults.reserve(toPad->getNumResults());
for (const auto &en : llvm::enumerate(paddedOp->getResults())) {
Value paddedResult = en.value();
int64_t resultNumber = en.index();
int64_t rank = cast<RankedTensorType>(paddedResult.getType()).getRank();
- SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0));
- SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1));
+ SmallVector<OpFoldResult> offsets(rank, builder.getIndexAttr(0));
+ SmallVector<OpFoldResult> strides(rank, builder.getIndexAttr(1));
paddedSubtensorResults.push_back(tensor::ExtractSliceOp::create(
- rewriter, loc, paddedResult, offsets, reifiedResultShapes[resultNumber],
+ builder, loc, paddedResult, offsets, reifiedResultShapes[resultNumber],
strides));
}
- rewriter.replaceOp(opToPad, paddedSubtensorResults);
-
- return paddedOp;
+ return PadTilingInterfaceResult{padOps, paddedOp, paddedSubtensorResults};
}
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 507597b..94947b7 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -2158,11 +2158,45 @@ public:
return success();
}
};
+
+struct ReinterpretCastOpConstantFolder
+ : public OpRewritePattern<ReinterpretCastOp> {
+public:
+ using OpRewritePattern<ReinterpretCastOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(ReinterpretCastOp op,
+ PatternRewriter &rewriter) const override {
+ unsigned srcStaticCount = llvm::count_if(
+ llvm::concat<OpFoldResult>(op.getMixedOffsets(), op.getMixedSizes(),
+ op.getMixedStrides()),
+ [](OpFoldResult ofr) { return isa<Attribute>(ofr); });
+
+ SmallVector<OpFoldResult> offsets = {op.getConstifiedMixedOffset()};
+ SmallVector<OpFoldResult> sizes = op.getConstifiedMixedSizes();
+ SmallVector<OpFoldResult> strides = op.getConstifiedMixedStrides();
+
+ // TODO: Using counting comparison instead of direct comparison because
+ // getMixedValues (and therefore ReinterpretCastOp::getMixed...) returns
+ // IntegerAttrs, while constifyIndexValues (and therefore
+ // ReinterpretCastOp::getConstifiedMixed...) returns IndexAttrs.
+ if (srcStaticCount ==
+ llvm::count_if(llvm::concat<OpFoldResult>(offsets, sizes, strides),
+ [](OpFoldResult ofr) { return isa<Attribute>(ofr); }))
+ return failure();
+
+ auto newReinterpretCast = ReinterpretCastOp::create(
+ rewriter, op->getLoc(), op.getSource(), offsets[0], sizes, strides);
+
+ rewriter.replaceOpWithNewOp<CastOp>(op, op.getType(), newReinterpretCast);
+ return success();
+ }
+};
} // namespace
void ReinterpretCastOp::getCanonicalizationPatterns(RewritePatternSet &results,
MLIRContext *context) {
- results.add<ReinterpretCastOpExtractStridedMetadataFolder>(context);
+ results.add<ReinterpretCastOpExtractStridedMetadataFolder,
+ ReinterpretCastOpConstantFolder>(context);
}
FailureOr<std::optional<SmallVector<Value>>>
diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateWideInt.cpp
index 49b7162..6f815ae 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/EmulateWideInt.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateWideInt.cpp
@@ -121,7 +121,7 @@ struct EmulateWideIntPass final
[&typeConverter](Operation *op) { return typeConverter.isLegal(op); });
RewritePatternSet patterns(ctx);
- // Add common pattenrs to support contants, functions, etc.
+ // Add common patterns to support contants, functions, etc.
arith::populateArithWideIntEmulationPatterns(typeConverter, patterns);
memref::populateMemRefWideIntEmulationPatterns(typeConverter, patterns);
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 642ced9..dcfe2c7 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -40,6 +40,16 @@ static bool isScalarLikeType(Type type) {
return type.isIntOrIndexOrFloat() || isa<ComplexType>(type);
}
+/// Helper function to attach the `VarName` attribute to an operation
+/// if a variable name is provided.
+static void attachVarNameAttr(Operation *op, OpBuilder &builder,
+ StringRef varName) {
+ if (!varName.empty()) {
+ auto varNameAttr = acc::VarNameAttr::get(builder.getContext(), varName);
+ op->setAttr(acc::getVarNameAttrName(), varNameAttr);
+ }
+}
+
struct MemRefPointerLikeModel
: public PointerLikeType::ExternalModel<MemRefPointerLikeModel,
MemRefType> {
@@ -83,7 +93,9 @@ struct MemRefPointerLikeModel
// then we can generate an alloca operation.
if (memrefTy.hasStaticShape()) {
needsFree = false; // alloca doesn't need deallocation
- return memref::AllocaOp::create(builder, loc, memrefTy).getResult();
+ auto allocaOp = memref::AllocaOp::create(builder, loc, memrefTy);
+ attachVarNameAttr(allocaOp, builder, varName);
+ return allocaOp.getResult();
}
// For dynamic memrefs, extract sizes from the original variable if
@@ -103,8 +115,10 @@ struct MemRefPointerLikeModel
// Static dimensions are handled automatically by AllocOp
}
needsFree = true; // alloc needs deallocation
- return memref::AllocOp::create(builder, loc, memrefTy, dynamicSizes)
- .getResult();
+ auto allocOp =
+ memref::AllocOp::create(builder, loc, memrefTy, dynamicSizes);
+ attachVarNameAttr(allocOp, builder, varName);
+ return allocOp.getResult();
}
// TODO: Unranked not yet supported.
@@ -1016,12 +1030,12 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
//===----------------------------------------------------------------------===//
/// Create and populate an init region for privatization recipes.
-/// Returns the init block on success, or nullptr on failure.
+/// Returns success if the region is populated, failure otherwise.
/// Sets needsFree to indicate if the allocated memory requires deallocation.
-static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
- Type varType, StringRef varName,
- ValueRange bounds,
- bool &needsFree) {
+static LogicalResult createInitRegion(OpBuilder &builder, Location loc,
+ Region &initRegion, Type varType,
+ StringRef varName, ValueRange bounds,
+ bool &needsFree) {
// Create init block with arguments: original value + bounds
SmallVector<Type> argTypes{varType};
SmallVector<Location> argLocs{loc};
@@ -1030,9 +1044,9 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
argLocs.push_back(loc);
}
- auto initBlock = std::make_unique<Block>();
+ Block *initBlock = builder.createBlock(&initRegion);
initBlock->addArguments(argTypes, argLocs);
- builder.setInsertionPointToStart(initBlock.get());
+ builder.setInsertionPointToStart(initBlock);
Value privatizedValue;
@@ -1046,7 +1060,7 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
privatizedValue = mappableTy.generatePrivateInit(
builder, loc, typedVar, varName, bounds, {}, needsFree);
if (!privatizedValue)
- return nullptr;
+ return failure();
} else {
assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
auto pointerLikeTy = cast<PointerLikeType>(varType);
@@ -1054,21 +1068,21 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
privatizedValue = pointerLikeTy.genAllocate(builder, loc, varName, varType,
blockArgVar, needsFree);
if (!privatizedValue)
- return nullptr;
+ return failure();
}
// Add yield operation to init block
acc::YieldOp::create(builder, loc, privatizedValue);
- return initBlock;
+ return success();
}
/// Create and populate a copy region for firstprivate recipes.
-/// Returns the copy block on success, or nullptr on failure.
+/// Returns success if the region is populated, failure otherwise.
/// TODO: Handle MappableType - it does not yet have a copy API.
-static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
- Type varType,
- ValueRange bounds) {
+static LogicalResult createCopyRegion(OpBuilder &builder, Location loc,
+ Region &copyRegion, Type varType,
+ ValueRange bounds) {
// Create copy block with arguments: original value + privatized value +
// bounds
SmallVector<Type> copyArgTypes{varType, varType};
@@ -1078,16 +1092,16 @@ static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
copyArgLocs.push_back(loc);
}
- auto copyBlock = std::make_unique<Block>();
+ Block *copyBlock = builder.createBlock(&copyRegion);
copyBlock->addArguments(copyArgTypes, copyArgLocs);
- builder.setInsertionPointToStart(copyBlock.get());
+ builder.setInsertionPointToStart(copyBlock);
bool isMappable = isa<MappableType>(varType);
bool isPointerLike = isa<PointerLikeType>(varType);
// TODO: Handle MappableType - it does not yet have a copy API.
// Otherwise, for now just fallback to pointer-like behavior.
if (isMappable && !isPointerLike)
- return nullptr;
+ return failure();
// Generate copy region body based on variable type
if (isPointerLike) {
@@ -1099,21 +1113,20 @@ static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
if (!pointerLikeTy.genCopy(
builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg),
cast<TypedValue<PointerLikeType>>(originalArg), varType))
- return nullptr;
+ return failure();
}
// Add terminator to copy block
acc::TerminatorOp::create(builder, loc);
- return copyBlock;
+ return success();
}
/// Create and populate a destroy region for privatization recipes.
-/// Returns the destroy block on success, or nullptr if not needed.
-static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
- Location loc, Type varType,
- Value allocRes,
- ValueRange bounds) {
+/// Returns success if the region is populated, failure otherwise.
+static LogicalResult createDestroyRegion(OpBuilder &builder, Location loc,
+ Region &destroyRegion, Type varType,
+ Value allocRes, ValueRange bounds) {
// Create destroy block with arguments: original value + privatized value +
// bounds
SmallVector<Type> destroyArgTypes{varType, varType};
@@ -1123,28 +1136,25 @@ static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
destroyArgLocs.push_back(loc);
}
- auto destroyBlock = std::make_unique<Block>();
+ Block *destroyBlock = builder.createBlock(&destroyRegion);
destroyBlock->addArguments(destroyArgTypes, destroyArgLocs);
- builder.setInsertionPointToStart(destroyBlock.get());
+ builder.setInsertionPointToStart(destroyBlock);
- bool isMappable = isa<MappableType>(varType);
- bool isPointerLike = isa<PointerLikeType>(varType);
- // TODO: Handle MappableType - it does not yet have a deallocation API.
- // Otherwise, for now just fallback to pointer-like behavior.
- if (isMappable && !isPointerLike)
- return nullptr;
-
- assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
- auto pointerLikeTy = cast<PointerLikeType>(varType);
- auto privatizedArg =
+ auto varToFree =
cast<TypedValue<PointerLikeType>>(destroyBlock->getArgument(1));
- // Pass allocRes to help determine the allocation type
- if (!pointerLikeTy.genFree(builder, loc, privatizedArg, allocRes, varType))
- return nullptr;
+ if (isa<MappableType>(varType)) {
+ auto mappableTy = cast<MappableType>(varType);
+ if (!mappableTy.generatePrivateDestroy(builder, loc, varToFree))
+ return failure();
+ } else {
+ assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+ auto pointerLikeTy = cast<PointerLikeType>(varType);
+ if (!pointerLikeTy.genFree(builder, loc, varToFree, allocRes, varType))
+ return failure();
+ }
acc::TerminatorOp::create(builder, loc);
-
- return destroyBlock;
+ return success();
}
} // namespace
@@ -1206,40 +1216,33 @@ PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
if (!isMappable && !isPointerLike)
return std::nullopt;
- // Create init and destroy blocks using shared helpers
OpBuilder::InsertionGuard guard(builder);
- // Save the original insertion point for creating the recipe operation later
- auto originalInsertionPoint = builder.saveInsertionPoint();
+ // Create the recipe operation first so regions have proper parent context
+ auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
+ // Populate the init region
bool needsFree = false;
- auto initBlock =
- createInitRegion(builder, loc, varType, varName, bounds, needsFree);
- if (!initBlock)
+ if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
+ varName, bounds, needsFree))) {
+ recipe.erase();
return std::nullopt;
+ }
// Only create destroy region if the allocation needs deallocation
- std::unique_ptr<Block> destroyBlock;
if (needsFree) {
// Extract the allocated value from the init block's yield operation
- auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+ auto yieldOp =
+ cast<acc::YieldOp>(recipe.getInitRegion().front().getTerminator());
Value allocRes = yieldOp.getOperand(0);
- destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
- if (!destroyBlock)
+ if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
+ varType, allocRes, bounds))) {
+ recipe.erase();
return std::nullopt;
+ }
}
- // Now create the recipe operation at the original insertion point and attach
- // the blocks
- builder.restoreInsertionPoint(originalInsertionPoint);
- auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
-
- // Move the blocks into the recipe's regions
- recipe.getInitRegion().push_back(initBlock.release());
- if (destroyBlock)
- recipe.getDestroyRegion().push_back(destroyBlock.release());
-
return recipe;
}
@@ -1285,45 +1288,40 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
if (!isMappable && !isPointerLike)
return std::nullopt;
- // Create init, copy, and destroy blocks using shared helpers
OpBuilder::InsertionGuard guard(builder);
- // Save the original insertion point for creating the recipe operation later
- auto originalInsertionPoint = builder.saveInsertionPoint();
+ // Create the recipe operation first so regions have proper parent context
+ auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
+ // Populate the init region
bool needsFree = false;
- auto initBlock =
- createInitRegion(builder, loc, varType, varName, bounds, needsFree);
- if (!initBlock)
+ if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
+ varName, bounds, needsFree))) {
+ recipe.erase();
return std::nullopt;
+ }
- auto copyBlock = createCopyRegion(builder, loc, varType, bounds);
- if (!copyBlock)
+ // Populate the copy region
+ if (failed(createCopyRegion(builder, loc, recipe.getCopyRegion(), varType,
+ bounds))) {
+ recipe.erase();
return std::nullopt;
+ }
// Only create destroy region if the allocation needs deallocation
- std::unique_ptr<Block> destroyBlock;
if (needsFree) {
// Extract the allocated value from the init block's yield operation
- auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+ auto yieldOp =
+ cast<acc::YieldOp>(recipe.getInitRegion().front().getTerminator());
Value allocRes = yieldOp.getOperand(0);
- destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
- if (!destroyBlock)
+ if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
+ varType, allocRes, bounds))) {
+ recipe.erase();
return std::nullopt;
+ }
}
- // Now create the recipe operation at the original insertion point and attach
- // the blocks
- builder.restoreInsertionPoint(originalInsertionPoint);
- auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
-
- // Move the blocks into the recipe's regions
- recipe.getInitRegion().push_back(initBlock.release());
- recipe.getCopyRegion().push_back(copyBlock.release());
- if (destroyBlock)
- recipe.getDestroyRegion().push_back(destroyBlock.release());
-
return recipe;
}
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
index 12e6475..7c019e7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp
@@ -2032,11 +2032,19 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
}
// Newly created `WarpOp` will yield values in following order:
- // 1. All init args of the `ForOp`.
- // 2. All escaping values.
- // 3. All non-`ForOp` yielded values.
+ // 1. Loop bounds.
+ // 2. All init args of the `ForOp`.
+ // 3. All escaping values.
+ // 4. All non-`ForOp` yielded values.
SmallVector<Value> newWarpOpYieldValues;
SmallVector<Type> newWarpOpDistTypes;
+ newWarpOpYieldValues.insert(
+ newWarpOpYieldValues.end(),
+ {forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep()});
+ newWarpOpDistTypes.insert(newWarpOpDistTypes.end(),
+ {forOp.getLowerBound().getType(),
+ forOp.getUpperBound().getType(),
+ forOp.getStep().getType()});
for (auto [i, initArg] : llvm::enumerate(forOp.getInitArgs())) {
newWarpOpYieldValues.push_back(initArg);
// Compute the distributed type for this init arg.
@@ -2072,20 +2080,24 @@ struct WarpOpScfForOp : public WarpDistributionPattern {
// Next, we create a new `ForOp` with the init args yielded by the new
// `WarpOp`.
+ const unsigned initArgsStartIdx = 3; // After loop bounds.
const unsigned escapingValuesStartIdx =
+ initArgsStartIdx +
forOp.getInitArgs().size(); // `ForOp` init args are positioned before
// escaping values in the new `WarpOp`.
SmallVector<Value> newForOpOperands;
- for (size_t i = 0; i < escapingValuesStartIdx; ++i)
+ for (size_t i = initArgsStartIdx; i < escapingValuesStartIdx; ++i)
newForOpOperands.push_back(newWarpOp.getResult(newIndices[i]));
// Create a new `ForOp` outside the new `WarpOp` region.
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPointAfter(newWarpOp);
auto newForOp = scf::ForOp::create(
- rewriter, forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(),
- forOp.getStep(), newForOpOperands, /*bodyBuilder=*/nullptr,
- forOp.getUnsignedCmp());
+ rewriter, forOp.getLoc(),
+ /**LowerBound=**/ newWarpOp.getResult(newIndices[0]),
+ /**UpperBound=**/ newWarpOp.getResult(newIndices[1]),
+ /**Step=**/ newWarpOp.getResult(newIndices[2]), newForOpOperands,
+ /*bodyBuilder=*/nullptr, forOp.getUnsignedCmp());
// Next, we insert a new `WarpOp` (called inner `WarpOp`) inside the
// newly created `ForOp`. This `WarpOp` will contain all ops that were
// contained within the original `ForOp` body.
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
index 1b656d8..ea93085 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp
@@ -817,6 +817,50 @@ struct LinearizeVectorToElements final
}
};
+/// Convert broadcasts from scalars or 1-element vectors, such as
+///
+/// ```mlir
+/// vector.broadcast %value : f32 to vector<4x4xf32>
+/// ```
+///
+/// to broadcasts to rank-1 vectors, with shape_casts before/after as needed.
+/// The above becomes,
+///
+/// ```mlir
+/// %out_1d = vector.broadcast %value : f32 to vector<16xf32>
+/// %out_nd = vector.shape_cast %out_1d : vector<16xf32> to vector<4x4xf32>
+/// ```
+struct LinearizeVectorBroadcast final
+ : public OpConversionPattern<vector::BroadcastOp> {
+ using Base::Base;
+
+ LinearizeVectorBroadcast(const TypeConverter &typeConverter,
+ MLIRContext *context, PatternBenefit benefit = 1)
+ : OpConversionPattern(typeConverter, context, benefit) {}
+
+ LogicalResult
+ matchAndRewrite(vector::BroadcastOp broadcastOp, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+
+ int numElements = 1;
+ Type sourceType = broadcastOp.getSourceType();
+ if (auto vecType = dyn_cast<VectorType>(sourceType)) {
+ numElements = vecType.getNumElements();
+ }
+
+ if (numElements != 1) {
+ return rewriter.notifyMatchFailure(
+ broadcastOp, "only broadcasts of single elements can be linearized.");
+ }
+
+ auto dstTy = getTypeConverter()->convertType(broadcastOp.getType());
+ rewriter.replaceOpWithNewOp<vector::BroadcastOp>(broadcastOp, dstTy,
+ adaptor.getSource());
+
+ return success();
+ }
+};
+
} // namespace
/// This method defines the set of operations that are linearizable, and hence
@@ -909,8 +953,8 @@ void mlir::vector::populateVectorLinearizeBasePatterns(
patterns
.add<LinearizeConstantLike, LinearizeVectorizable, LinearizeVectorBitCast,
LinearizeVectorCreateMask, LinearizeVectorLoad, LinearizeVectorStore,
- LinearizeVectorFromElements, LinearizeVectorToElements>(
- typeConverter, patterns.getContext());
+ LinearizeVectorBroadcast, LinearizeVectorFromElements,
+ LinearizeVectorToElements>(typeConverter, patterns.getContext());
}
void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns(
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
index 025ee9a..c809c502 100644
--- a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
+++ b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -91,7 +91,7 @@ mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
// Check whether the two source vector dimensions that are greater than one
// must be transposed with each other so that we can apply one of the 2-D
- // transpose pattens. Otherwise, these patterns are not applicable.
+ // transpose patterns. Otherwise, these patterns are not applicable.
if (!areDimsTransposedIn2DSlice(srcGtOneDims[0], srcGtOneDims[1],
op.getPermutation()))
return failure();
diff --git a/mlir/lib/Dialect/WasmSSA/IR/WasmSSAOps.cpp b/mlir/lib/Dialect/WasmSSA/IR/WasmSSAOps.cpp
index 89b62a2..a514ea9 100644
--- a/mlir/lib/Dialect/WasmSSA/IR/WasmSSAOps.cpp
+++ b/mlir/lib/Dialect/WasmSSA/IR/WasmSSAOps.cpp
@@ -12,6 +12,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/Region.h"
#include "mlir/IR/SymbolTable.h"
@@ -39,28 +40,6 @@ void printElseRegion(OpAsmPrinter &opPrinter, Operation *op,
opPrinter.printKeywordOrString("else ");
opPrinter.printRegion(elseRegion);
}
-
-ParseResult parseWasmVisibility(OpAsmParser &opParser, StringAttr &visibility) {
- std::string keyword;
- auto initLocation = opParser.getCurrentLocation();
- std::ignore = opParser.parseOptionalKeywordOrString(&keyword);
- if (keyword == "nested" or keyword == "") {
- visibility = StringAttr::get(opParser.getContext(), "nested");
- return ParseResult::success();
- }
-
- if (keyword == "public" || keyword == "private") {
- visibility = StringAttr::get(opParser.getContext(), keyword);
- return ParseResult::success();
- }
- opParser.emitError(initLocation, "expecting symbol visibility");
- return ParseResult::failure();
-}
-
-void printWasmVisibility(OpAsmPrinter &opPrinter, Operation *op,
- Attribute visibility) {
- opPrinter.printKeywordOrString(cast<StringAttr>(visibility).strref());
-}
} // namespace
#define GET_OP_CLASSES
@@ -167,10 +146,23 @@ Block *FuncOp::addEntryBlock() {
void FuncOp::build(OpBuilder &odsBuilder, OperationState &odsState,
StringRef symbol, FunctionType funcType) {
- FuncOp::build(odsBuilder, odsState, symbol, funcType, {}, {}, "nested");
+ FuncOp::build(odsBuilder, odsState, symbol, funcType, {}, {});
}
ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
+ auto *ctx = parser.getContext();
+ std::string visibilityString;
+ auto loc = parser.getNameLoc();
+ ParseResult res = parser.parseOptionalKeywordOrString(&visibilityString);
+ bool exported{false};
+ if (res.succeeded()) {
+ if (visibilityString != "exported")
+ return parser.emitError(
+ loc, "expecting either `exported` or symbol name. got ")
+ << visibilityString;
+ exported = true;
+ }
+
auto buildFuncType = [&parser](Builder &builder, ArrayRef<Type> argTypes,
ArrayRef<Type> results,
function_interface_impl::VariadicFlag,
@@ -191,11 +183,13 @@ ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) {
return builder.getFunctionType(argTypesWithoutLocal, results);
};
-
- return function_interface_impl::parseFunctionOp(
+ auto funcParseRes = function_interface_impl::parseFunctionOp(
parser, result, /*allowVariadic=*/false,
getFunctionTypeAttrName(result.name), buildFuncType,
getArgAttrsAttrName(result.name), getResAttrsAttrName(result.name));
+ if (exported)
+ result.addAttribute(getExportedAttrName(result.name), UnitAttr::get(ctx));
+ return funcParseRes;
}
LogicalResult FuncOp::verifyBody() {
@@ -224,9 +218,18 @@ LogicalResult FuncOp::verifyBody() {
}
void FuncOp::print(OpAsmPrinter &p) {
+ /// If exported, print it before and mask it before printing
+ /// using generic interface.
+ auto exported = getExported();
+ if (exported) {
+ p << " exported";
+ removeExportedAttr();
+ }
function_interface_impl::printFunctionOp(
p, *this, /*isVariadic=*/false, getFunctionTypeAttrName(),
getArgAttrsAttrName(), getResAttrsAttrName());
+ if (exported)
+ setExported(true);
}
//===----------------------------------------------------------------------===//
@@ -237,38 +240,37 @@ void FuncImportOp::build(OpBuilder &odsBuilder, OperationState &odsState,
StringRef symbol, StringRef moduleName,
StringRef importName, FunctionType type) {
FuncImportOp::build(odsBuilder, odsState, symbol, moduleName, importName,
- type, {}, {}, odsBuilder.getStringAttr("nested"));
+ type, {}, {});
}
//===----------------------------------------------------------------------===//
// GlobalOp
//===----------------------------------------------------------------------===//
-
-void GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, Type type, bool isMutable) {
- GlobalOp::build(odsBuilder, odsState, symbol, type, isMutable,
- odsBuilder.getStringAttr("nested"));
-}
-
// Custom formats
ParseResult GlobalOp::parse(OpAsmParser &parser, OperationState &result) {
StringAttr symbolName;
Type globalType;
auto *ctx = parser.getContext();
- ParseResult res = parser.parseSymbolName(
- symbolName, SymbolTable::getSymbolAttrName(), result.attributes);
+ std::string visibilityString;
+ auto loc = parser.getNameLoc();
+ ParseResult res = parser.parseOptionalKeywordOrString(&visibilityString);
+ if (res.succeeded()) {
+ if (visibilityString != "exported")
+ return parser.emitError(
+ loc, "expecting either `exported` or symbol name. got ")
+ << visibilityString;
+ result.addAttribute(getExportedAttrName(result.name), UnitAttr::get(ctx));
+ }
+ res = parser.parseSymbolName(symbolName, SymbolTable::getSymbolAttrName(),
+ result.attributes);
res = parser.parseType(globalType);
result.addAttribute(getTypeAttrName(result.name), TypeAttr::get(globalType));
std::string mutableString;
res = parser.parseOptionalKeywordOrString(&mutableString);
if (res.succeeded() && mutableString == "mutable")
result.addAttribute("isMutable", UnitAttr::get(ctx));
- std::string visibilityString;
- res = parser.parseOptionalKeywordOrString(&visibilityString);
- if (res.succeeded())
- result.addAttribute("sym_visibility",
- StringAttr::get(ctx, visibilityString));
+
res = parser.parseColon();
Region *globalInitRegion = result.addRegion();
res = parser.parseRegion(*globalInitRegion);
@@ -276,11 +278,11 @@ ParseResult GlobalOp::parse(OpAsmParser &parser, OperationState &result) {
}
void GlobalOp::print(OpAsmPrinter &printer) {
+ if (getExported())
+ printer << " exported";
printer << " @" << getSymName().str() << " " << getType();
if (getIsMutable())
printer << " mutable";
- if (auto vis = getSymVisibility())
- printer << " " << *vis;
printer << " :";
Region &body = getRegion();
if (!body.empty()) {
@@ -319,13 +321,6 @@ GlobalGetOp::verifySymbolUses(SymbolTableCollection &symbolTable) {
// GlobalImportOp
//===----------------------------------------------------------------------===//
-void GlobalImportOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, StringRef moduleName,
- StringRef importName, Type type, bool isMutable) {
- GlobalImportOp::build(odsBuilder, odsState, symbol, moduleName, importName,
- type, isMutable, odsBuilder.getStringAttr("nested"));
-}
-
ParseResult GlobalImportOp::parse(OpAsmParser &parser, OperationState &result) {
auto *ctx = parser.getContext();
ParseResult res = parseImportOp(parser, result);
@@ -335,12 +330,8 @@ ParseResult GlobalImportOp::parse(OpAsmParser &parser, OperationState &result) {
res = parser.parseOptionalKeywordOrString(&mutableOrSymVisString);
if (res.succeeded() && mutableOrSymVisString == "mutable") {
result.addAttribute("isMutable", UnitAttr::get(ctx));
- res = parser.parseOptionalKeywordOrString(&mutableOrSymVisString);
}
- if (res.succeeded())
- result.addAttribute("sym_visibility",
- StringAttr::get(ctx, mutableOrSymVisString));
res = parser.parseColon();
Type importedType;
@@ -356,8 +347,6 @@ void GlobalImportOp::print(OpAsmPrinter &printer) {
<< "\" as @" << getSymName();
if (getIsMutable())
printer << " mutable";
- if (auto vis = getSymVisibility())
- printer << " " << *vis;
printer << " : " << getType();
}
@@ -431,27 +420,6 @@ LogicalResult LocalTeeOp::verify() {
Block *LoopOp::getLabelTarget() { return &getBody().front(); }
//===----------------------------------------------------------------------===//
-// MemOp
-//===----------------------------------------------------------------------===//
-
-void MemOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, LimitType limit) {
- MemOp::build(odsBuilder, odsState, symbol, limit,
- odsBuilder.getStringAttr("nested"));
-}
-
-//===----------------------------------------------------------------------===//
-// MemImportOp
-//===----------------------------------------------------------------------===//
-
-void MemImportOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, StringRef moduleName,
- StringRef importName, LimitType limits) {
- MemImportOp::build(odsBuilder, odsState, symbol, moduleName, importName,
- limits, odsBuilder.getStringAttr("nested"));
-}
-
-//===----------------------------------------------------------------------===//
// ReinterpretOp
//===----------------------------------------------------------------------===//
@@ -471,24 +439,3 @@ LogicalResult ReinterpretOp::verify() {
//===----------------------------------------------------------------------===//
void ReturnOp::build(OpBuilder &odsBuilder, OperationState &odsState) {}
-
-//===----------------------------------------------------------------------===//
-// TableOp
-//===----------------------------------------------------------------------===//
-
-void TableOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, TableType type) {
- TableOp::build(odsBuilder, odsState, symbol, type,
- odsBuilder.getStringAttr("nested"));
-}
-
-//===----------------------------------------------------------------------===//
-// TableImportOp
-//===----------------------------------------------------------------------===//
-
-void TableImportOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- StringRef symbol, StringRef moduleName,
- StringRef importName, TableType type) {
- TableImportOp::build(odsBuilder, odsState, symbol, moduleName, importName,
- type, odsBuilder.getStringAttr("nested"));
-}
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 9beb22d..1599ae9 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -727,6 +727,152 @@ void MemLayoutAttr::print(AsmPrinter &printer) const {
}
printer << ">";
}
+// a helper utility to perform binary operation on OpFoldResult.
+// If both a and b are attributes, it will simply return the result.
+// Otherwise, the corresponding arith op will be generated, and an
+// contant op will be created if one of them is an attribute.
+template <typename ArithOp>
+OpFoldResult genBinOp(OpFoldResult a, OpFoldResult b, Location loc,
+ OpBuilder &builder) {
+ auto aVal = getValueOrCreateConstantIndexOp(builder, loc, a);
+ auto bVal = getValueOrCreateConstantIndexOp(builder, loc, b);
+ return builder.create<ArithOp>(loc, aVal, bVal).getResult();
+}
+
+// a helper utility to perform division operation on OpFoldResult and int64_t.
+#define div(a, b) \
+ genBinOp<arith::DivSIOp>(a, builder.getIndexAttr(b), loc, builder)
+
+// a helper utility to perform reminder operation on OpFoldResult and int64_t.
+#define rem(a, b) \
+ genBinOp<arith::RemSIOp>(a, builder.getIndexAttr(b), loc, builder)
+
+// a helper utility to perform multiply operation on OpFoldResult and int64_t.
+#define mul(a, b) \
+ genBinOp<arith::MulIOp>(a, builder.getIndexAttr(b), loc, builder)
+
+// a helper utility to perform addition operation on two OpFoldResult.
+#define add(a, b) genBinOp<arith::AddIOp>(a, b, loc, builder)
+
+// block the given offsets according to the block shape
+// say the original offset is [y, x], and the block shape is [By, Bx],
+// then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
+SmallVector<OpFoldResult> getBlockedOffsets(OpBuilder &builder, Location loc,
+ ArrayRef<OpFoldResult> offsets,
+ ArrayRef<int64_t> blockShape) {
+
+ assert(offsets.size() == blockShape.size() &&
+ "offsets and blockShape must have the same size");
+ SmallVector<OpFoldResult> blockedOffsets;
+ SmallVector<OpFoldResult> divs, rems;
+
+ for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
+ divs.push_back(div(offset, block));
+ rems.push_back(rem(offset, block));
+ }
+ blockedOffsets.append(divs.begin(), divs.end());
+ blockedOffsets.append(rems.begin(), rems.end());
+
+ return blockedOffsets;
+}
+
+// Get strides as vector of integer for MemDesc.
+SmallVector<int64_t> MemDescType::getStrideShape() {
+
+ SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
+
+ ArrayAttr strideAttr = getStrideAttr();
+ SmallVector<int64_t> strides;
+ for (Attribute attr : strideAttr.getValue()) {
+ strides.push_back(cast<IntegerAttr>(attr).getInt());
+ }
+
+ SmallVector<int64_t> innerBlkShape = getBlockShape();
+
+ // get perm from FCD to LCD
+ // perm[i] = the dim with i-th smallest stride
+ SmallVector<int, 4> perm =
+ llvm::to_vector<4>(llvm::seq<int>(0, strides.size()));
+ llvm::sort(perm, [&](int a, int b) { return strides[a] < strides[b]; });
+
+ assert(strides[perm[0]] == 1 && "inner most dim must have stride 1");
+
+ SmallVector<int64_t> innerBlkStride(innerBlkShape.size());
+ innerBlkStride[perm[0]] = 1;
+ for (size_t i = 1; i < perm.size(); ++i)
+ innerBlkStride[perm[i]] =
+ innerBlkStride[perm[i - 1]] * innerBlkShape[perm[i - 1]];
+
+ // compute the original matrix shape using the stride info
+ // and compute the number of blocks in each dimension
+ // The shape of highest dim can't be derived from stride info,
+ // but doesn't impact the stride computation for blocked layout.
+ SmallVector<int64_t> matrixShapeOrig(matrixShape.size());
+ SmallVector<int64_t> BlkShapeOrig(matrixShape.size());
+ for (size_t i = 0; i < perm.size() - 1; ++i) {
+ matrixShapeOrig[perm[i]] = strides[perm[i + 1]] / strides[perm[i]];
+ BlkShapeOrig[perm[i]] = matrixShapeOrig[perm[i]] / innerBlkShape[perm[i]];
+ }
+
+ int64_t innerBlkSize = 1;
+ for (auto s : innerBlkShape)
+ innerBlkSize *= s;
+
+ SmallVector<int64_t> outerBlkStride(matrixShape.size());
+ outerBlkStride[perm[0]] = innerBlkSize;
+ for (size_t i = 0; i < perm.size() - 1; ++i) {
+ outerBlkStride[perm[i + 1]] =
+ outerBlkStride[perm[i]] * BlkShapeOrig[perm[i]];
+ }
+
+ // combine the inner and outer strides
+ SmallVector<int64_t> blockedStrides;
+ blockedStrides.append(outerBlkStride.begin(), outerBlkStride.end());
+ blockedStrides.append(innerBlkStride.begin(), innerBlkStride.end());
+
+ return blockedStrides;
+}
+
+// Calculate the linear offset using the blocked offsets and stride
+Value MemDescType::getLinearOffsets(OpBuilder &builder, Location loc,
+ ArrayRef<OpFoldResult> offsets) {
+
+ SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
+ SmallVector<int64_t> blockShape = getBlockShape();
+ SmallVector<int64_t> strides = getStrideShape();
+ SmallVector<OpFoldResult> blockedOffsets;
+
+ // blockshape equal to matrixshape means no blocking
+ if (llvm::equal(blockShape, matrixShape)) {
+ // remove the outer dims from strides
+ strides.erase(strides.begin(), strides.begin() + matrixShape.size());
+ } else {
+ assert(offsets.size() == blockShape.size() &&
+ "offsets and blockShape must have the same size");
+ // say the original offset is [y, x], and the block shape is [By, Bx],
+ // then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
+
+ SmallVector<OpFoldResult> divs, rems;
+
+ for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
+ divs.push_back(div(offset, block));
+ rems.push_back(rem(offset, block));
+ }
+ blockedOffsets.append(divs.begin(), divs.end());
+ blockedOffsets.append(rems.begin(), rems.end());
+ offsets = blockedOffsets;
+ }
+
+ // Start with initial value as matrix descriptor's base offset.
+ Value linearOffset = arith::ConstantIndexOp::create(builder, loc, 0);
+ for (size_t i = 0; i < offsets.size(); ++i) {
+ OpFoldResult mulResult = mul(offsets[i], strides[i]);
+ Value mulVal = getValueOrCreateConstantIndexOp(builder, loc, mulResult);
+ linearOffset = arith::AddIOp::create(builder, loc, mulVal, linearOffset);
+ }
+
+ return linearOffset;
+}
} // namespace xegpu
} // namespace mlir
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
index e0a8ac4..abd12e2 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -173,6 +173,49 @@ isValidGatherScatterBufferParams(Type offsetsTy, Type maskTy,
return success();
}
+LogicalResult
+IsValidMatrixOpParams(VectorType dataTy, MemDescType mdescTy,
+ UnitAttr subgroup_block_io,
+ function_ref<InFlightDiagnostic()> emitError) {
+
+ if (!dataTy) {
+ if (subgroup_block_io)
+ return emitError() << "subgroup_block_io "
+ "are only allowed when result is a 1D VectorType.";
+ else
+ return success();
+ }
+
+ if (mdescTy.getRank() != 2)
+ return emitError() << "mem_desc must be 2D.";
+
+ ArrayRef<int64_t> dataShape = dataTy.getShape();
+ ArrayRef<int64_t> mdescShape = mdescTy.getShape();
+
+ if (dataShape.size() == 2) {
+ if (subgroup_block_io)
+ return emitError() << "subgroup_block_io "
+ "are only allowed when result is a 1D VectorType.";
+ if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape),
+ [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
+ return emitError() << "data shape must not exceed mem_desc shape.";
+ } else {
+ SmallVector<int64_t> blockShape = mdescTy.getBlockShape();
+ // if the subgroup_block_io attribute is set, mdescTy must have block
+ // attribute
+ if (subgroup_block_io && !blockShape.size())
+ return emitError() << "mem_desc must have block attribute when "
+ "subgroup_block_io is set.";
+ // if the subgroup_block_io attribute is set, the memdesc should be row
+ // major
+ if (subgroup_block_io && mdescTy.isColMajor())
+ return emitError() << "mem_desc should be row major when "
+ "subgroup_block_io is set.";
+ }
+
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// XeGPU_CreateNdDescOp
//===----------------------------------------------------------------------===//
@@ -1049,23 +1092,20 @@ void LoadMatrixOp::build(OpBuilder &builder, OperationState &state, Type res,
llvm::SmallVector<int64_t> staticOffsets;
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
+ // Call the generated builder with all parameters (including optional ones as
+ // nullptr/empty)
build(builder, state, res, memDesc, dynamicOffsets, staticOffsetsAttr,
- layout);
+ /*subgroup_block_io=*/nullptr, layout);
}
LogicalResult LoadMatrixOp::verify() {
- VectorType resTy = getRes().getType();
- MemDescType mdescTy = getMemDesc().getType();
- if (mdescTy.getRank() != 2)
- return emitOpError("mem_desc must be 2D.");
+ auto resTy = dyn_cast<VectorType>(getRes().getType());
+ UnitAttr subgroup_block_io = getSubgroupBlockIoAttr();
+ MemDescType mdescTy = getMemDesc().getType();
- ArrayRef<int64_t> valueShape = resTy.getShape();
- ArrayRef<int64_t> mdescShape = mdescTy.getShape();
- if (llvm::any_of(llvm::zip_equal(valueShape, mdescShape),
- [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
- return emitOpError("result shape must not exceed mem_desc shape.");
- return success();
+ return IsValidMatrixOpParams(resTy, mdescTy, subgroup_block_io,
+ [&]() { return emitError(); });
}
//===----------------------------------------------------------------------===//
@@ -1080,57 +1120,16 @@ void StoreMatrixOp::build(OpBuilder &builder, OperationState &state, Value data,
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
build(builder, state, data, memDesc, dynamicOffsets, staticOffsetsAttr,
- layout);
+ /*subgroup_block_io=*/nullptr, layout);
}
LogicalResult StoreMatrixOp::verify() {
- VectorType dataTy = getData().getType();
- MemDescType mdescTy = getMemDesc().getType();
-
- if (mdescTy.getRank() != 2)
- return emitOpError("mem_desc must be 2D.");
-
- ArrayRef<int64_t> dataShape = dataTy.getShape();
- ArrayRef<int64_t> mdescShape = mdescTy.getShape();
- if (llvm::any_of(llvm::zip_equal(dataShape, mdescShape),
- [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
- return emitOpError("data shape must not exceed mem_desc shape.");
-
- return success();
-}
-
-//===----------------------------------------------------------------------===//
-// XeGPU_MemDescSubviewOp
-//===----------------------------------------------------------------------===//
-
-void MemDescSubviewOp::build(OpBuilder &builder, OperationState &state,
- Type resTy, Value src,
- llvm::ArrayRef<OpFoldResult> offsets) {
- llvm::SmallVector<Value> dynamicOffsets;
- llvm::SmallVector<int64_t> staticOffsets;
- dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
- auto staticOffsetsAttr = builder.getDenseI64ArrayAttr(staticOffsets);
- build(builder, state, resTy, src, dynamicOffsets, staticOffsetsAttr);
-}
-
-LogicalResult MemDescSubviewOp::verify() {
- MemDescType srcTy = getSrc().getType();
- MemDescType resTy = getRes().getType();
- ArrayRef<int64_t> srcShape = srcTy.getShape();
- ArrayRef<int64_t> resShape = resTy.getShape();
- if (srcTy.getRank() < resTy.getRank())
- return emitOpError("result rank must not exceed source rank.");
-
- if (llvm::any_of(
- llvm::zip_equal(resShape, srcShape.take_back(resShape.size())),
- [](auto p) { return std::get<0>(p) > std::get<1>(p); }))
- return emitOpError("result shape must not exceed source shape.");
-
- if (srcTy.getStrides() != resTy.getStrides())
- return emitOpError("result must inherit the source strides.");
-
- return success();
+ auto dataTy = dyn_cast<VectorType>(getData().getType());
+ UnitAttr subgroup_block_io = getSubgroupBlockIoAttr();
+ MemDescType mdescTy = getMemDesc().getType();
+ return IsValidMatrixOpParams(dataTy, mdescTy, subgroup_block_io,
+ [&]() { return emitError(); });
}
namespace mlir {
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
index a178d0f..aafa1b7 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp
@@ -941,7 +941,9 @@ struct UnrollLoadMatrixOp : public UnrollPattern<xegpu::LoadMatrixOp> {
LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
PatternRewriter &rewriter) const override {
Location loc = op.getLoc();
- VectorType valueTy = op.getType();
+ VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
+ assert(valueTy && "the value type must be vector type!");
+
std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
if (!targetShape || targetShape->size() != (size_t)valueTy.getRank())
return failure();
@@ -984,7 +986,8 @@ struct UnrollStoreMatrixOp : public UnrollPattern<xegpu::StoreMatrixOp> {
return failure();
Location loc = op.getLoc();
- VectorType valueTy = op.getData().getType();
+ VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
+ assert(valueTy && "the value type must be vector type!");
ArrayRef<int64_t> shape = valueTy.getShape();
auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index c28d2fc..31a967d 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -991,7 +991,8 @@ struct WgToSgLoadMatrixOp : public OpConversionPattern<xegpu::LoadMatrixOp> {
return failure();
ArrayRef<int64_t> wgShape = op.getDataShape();
- VectorType valueTy = op.getRes().getType();
+ VectorType valueTy = llvm::dyn_cast<VectorType>(op.getRes().getType());
+ assert(valueTy && "the value type must be vector type!");
Type elemTy = valueTy.getElementType();
xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
diff --git a/mlir/lib/RegisterAllPasses.cpp b/mlir/lib/RegisterAllPasses.cpp
index c67b242..dd413d2de 100644
--- a/mlir/lib/RegisterAllPasses.cpp
+++ b/mlir/lib/RegisterAllPasses.cpp
@@ -98,4 +98,5 @@ void mlir::registerAllPasses() {
sparse_tensor::registerSparseTensorPipelines();
tosa::registerTosaToLinalgPipelines();
gpu::registerGPUToNVVMPipeline();
+ gpu::registerGPUToXeVMPipeline();
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1e2099d..8de49dd 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -246,7 +246,7 @@ public:
// Rewrite all uses of the original variable in `BBName`
// with the linear variable in-place
- void rewriteInPlace(llvm::IRBuilderBase &builder, std::string BBName,
+ void rewriteInPlace(llvm::IRBuilderBase &builder, const std::string &BBName,
size_t varIndex) {
llvm::SmallVector<llvm::User *> users;
for (llvm::User *user : linearOrigVal[varIndex]->users())
diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
index 0c3e87a..d9ad8fb 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
+++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
@@ -2619,6 +2619,11 @@ LogicalResult ControlFlowStructurizer::structurize() {
// region. We cannot handle such cases given that once a value is sinked into
// the SelectionOp/LoopOp's region, there is no escape for it.
for (auto *block : constructBlocks) {
+ if (!block->use_empty())
+ return emitError(block->getParent()->getLoc(),
+ "failed control flow structurization: "
+ "block has uses outside of the "
+ "enclosing selection/loop construct");
for (Operation &op : *block)
if (!op.use_empty())
return op.emitOpError("failed control flow structurization: value has "
diff --git a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
index 51c6077..366ba8f 100644
--- a/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
+++ b/mlir/lib/Target/Wasm/TranslateFromWasm.cpp
@@ -14,6 +14,7 @@
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributeInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Location.h"
#include "mlir/Support/LLVM.h"
@@ -138,6 +139,10 @@ using ImportDesc =
using parsed_inst_t = FailureOr<SmallVector<Value>>;
+struct EmptyBlockMarker {};
+using BlockTypeParseResult =
+ std::variant<EmptyBlockMarker, TypeIdxRecord, Type>;
+
struct WasmModuleSymbolTables {
SmallVector<FunctionSymbolRefContainer> funcSymbols;
SmallVector<GlobalSymbolRefContainer> globalSymbols;
@@ -175,6 +180,9 @@ class ParserHead;
/// Wrapper around SmallVector to only allow access as push and pop on the
/// stack. Makes sure that there are no "free accesses" on the stack to preserve
/// its state.
+/// This class also keep tracks of the Wasm labels defined by different ops,
+/// which can be targeted by control flow ops. This can be modeled as part of
+/// the Value Stack as Wasm control flow ops can only target enclosing labels.
class ValueStack {
private:
struct LabelLevel {
@@ -206,6 +214,16 @@ public:
/// if an error occurs.
LogicalResult pushResults(ValueRange results, Location *opLoc);
+ void addLabelLevel(LabelLevelOpInterface levelOp) {
+ labelLevel.push_back({values.size(), levelOp});
+ LDBG() << "Adding a new frame context to ValueStack";
+ }
+
+ void dropLabelLevel() {
+ assert(!labelLevel.empty() && "Trying to drop a frame from empty context");
+ auto newSize = labelLevel.pop_back_val().stackIdx;
+ values.truncate(newSize);
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// A simple dump function for debugging.
/// Writes output to llvm::dbgs().
@@ -214,6 +232,7 @@ public:
private:
SmallVector<Value> values;
+ SmallVector<LabelLevel> labelLevel;
};
using local_val_t = TypedValue<wasmssa::LocalRefType>;
@@ -248,6 +267,19 @@ private:
buildNumericOp(OpBuilder &builder,
std::enable_if_t<std::is_arithmetic_v<valueType>> * = nullptr);
+ /// Construct a conversion operation of type \p opType that takes a value from
+ /// type \p inputType on the stack and will produce a value of type
+ /// \p outputType.
+ ///
+ /// \p opType - The WASM dialect operation to build.
+ /// \p inputType - The operand type for the built instruction.
+ /// \p outputType - The result type for the built instruction.
+ ///
+ /// \returns The parsed instruction result, or failure.
+ template <typename opType, typename inputType, typename outputType,
+ typename... extraArgsT>
+ inline parsed_inst_t buildConvertOp(OpBuilder &builder, extraArgsT...);
+
/// This function generates a dispatch tree to associate an opcode with a
/// parser. Parsers are registered by specialising the
/// `parseSpecificInstruction` function for the op code to handle.
@@ -280,11 +312,105 @@ private:
}
}
+ ///
+ /// RAII guard class for creating a nesting level
+ ///
+ struct NestingContextGuard {
+ NestingContextGuard(ExpressionParser &parser, LabelLevelOpInterface levelOp)
+ : parser{parser} {
+ parser.addNestingContextLevel(levelOp);
+ }
+ NestingContextGuard(NestingContextGuard &&other) : parser{other.parser} {
+ other.shouldDropOnDestruct = false;
+ }
+ NestingContextGuard(NestingContextGuard const &) = delete;
+ ~NestingContextGuard() {
+ if (shouldDropOnDestruct)
+ parser.dropNestingContextLevel();
+ }
+ ExpressionParser &parser;
+ bool shouldDropOnDestruct = true;
+ };
+
+ void addNestingContextLevel(LabelLevelOpInterface levelOp) {
+ valueStack.addLabelLevel(levelOp);
+ }
+
+ void dropNestingContextLevel() {
+ // Should always succeed as we are droping the frame that was previously
+ // created.
+ valueStack.dropLabelLevel();
+ }
+
+ llvm::FailureOr<FunctionType> getFuncTypeFor(OpBuilder &builder,
+ EmptyBlockMarker) {
+ return builder.getFunctionType({}, {});
+ }
+
+ llvm::FailureOr<FunctionType> getFuncTypeFor(OpBuilder &builder,
+ TypeIdxRecord type) {
+ if (type.id >= symbols.moduleFuncTypes.size())
+ return emitError(*currentOpLoc,
+ "type index references nonexistent type (")
+ << type.id << "). Only " << symbols.moduleFuncTypes.size()
+ << " types are registered";
+ return symbols.moduleFuncTypes[type.id];
+ }
+
+ llvm::FailureOr<FunctionType> getFuncTypeFor(OpBuilder &builder,
+ Type valType) {
+ return builder.getFunctionType({}, {valType});
+ }
+
+ llvm::FailureOr<FunctionType>
+ getFuncTypeFor(OpBuilder &builder, BlockTypeParseResult parseResult) {
+ return std::visit(
+ [this, &builder](auto value) { return getFuncTypeFor(builder, value); },
+ parseResult);
+ }
+
+ llvm::FailureOr<FunctionType>
+ getFuncTypeFor(OpBuilder &builder,
+ llvm::FailureOr<BlockTypeParseResult> parseResult) {
+ if (llvm::failed(parseResult))
+ return failure();
+ return getFuncTypeFor(builder, *parseResult);
+ }
+
+ llvm::FailureOr<FunctionType> parseBlockFuncType(OpBuilder &builder);
+
struct ParseResultWithInfo {
SmallVector<Value> opResults;
std::byte endingByte;
};
+ template <typename FilterT = ByteSequence<WasmBinaryEncoding::endByte>>
+ /// @param blockToFill: the block which content will be populated
+ /// @param resType: the type that this block is supposed to return
+ llvm::FailureOr<std::byte>
+ parseBlockContent(OpBuilder &builder, Block *blockToFill, TypeRange resTypes,
+ Location opLoc, LabelLevelOpInterface levelOp,
+ FilterT parseEndBytes = {}) {
+ OpBuilder::InsertionGuard guard{builder};
+ builder.setInsertionPointToStart(blockToFill);
+ LDBG() << "parsing a block of type "
+ << builder.getFunctionType(blockToFill->getArgumentTypes(),
+ resTypes);
+ auto nC = addNesting(levelOp);
+
+ if (failed(pushResults(blockToFill->getArguments())))
+ return failure();
+ auto bodyParsingRes = parse(builder, parseEndBytes);
+ if (failed(bodyParsingRes))
+ return failure();
+ auto returnOperands = popOperands(resTypes);
+ if (failed(returnOperands))
+ return failure();
+ builder.create<BlockReturnOp>(opLoc, *returnOperands);
+ LDBG() << "end of parsing of a block";
+ return bodyParsingRes->endingByte;
+ }
+
public:
template <std::byte ParseEndByte = WasmBinaryEncoding::endByte>
parsed_inst_t parse(OpBuilder &builder, UniqueByte<ParseEndByte> = {});
@@ -294,7 +420,11 @@ public:
parse(OpBuilder &builder,
ByteSequence<ExpressionParseEnd...> parsingEndFilters);
- FailureOr<SmallVector<Value>> popOperands(TypeRange operandTypes) {
+ NestingContextGuard addNesting(LabelLevelOpInterface levelOp) {
+ return NestingContextGuard{*this, levelOp};
+ }
+
+ FailureOr<llvm::SmallVector<Value>> popOperands(TypeRange operandTypes) {
return valueStack.popOperands(operandTypes, &currentOpLoc.value());
}
@@ -308,6 +438,12 @@ public:
template <typename OpToCreate>
parsed_inst_t parseSetOrTee(OpBuilder &);
+ /// Blocks and Loops have a similar format and differ only in how their exit
+ /// is handled which doesn´t matter at parsing time. Factorizes in one
+ /// function.
+ template <typename OpToCreate>
+ parsed_inst_t parseBlockLikeOp(OpBuilder &);
+
private:
std::optional<Location> currentOpLoc;
ParserHead &parser;
@@ -586,6 +722,29 @@ public:
return success();
}
+ llvm::FailureOr<BlockTypeParseResult> parseBlockType(MLIRContext *ctx) {
+ auto loc = getLocation();
+ auto blockIndicator = peek();
+ if (failed(blockIndicator))
+ return failure();
+ if (*blockIndicator == WasmBinaryEncoding::Type::emptyBlockType) {
+ offset += 1;
+ return {EmptyBlockMarker{}};
+ }
+ if (isValueOneOf(*blockIndicator, valueTypesEncodings))
+ return parseValueType(ctx);
+ /// Block type idx is a 32 bit positive integer encoded as a 33 bit signed
+ /// value
+ auto typeIdx = parseI64();
+ if (failed(typeIdx))
+ return failure();
+ if (*typeIdx < 0 || *typeIdx > std::numeric_limits<uint32_t>::max())
+ return emitError(loc, "type ID should be representable with an unsigned "
+ "32 bits integer. Got ")
+ << *typeIdx;
+ return {TypeIdxRecord{static_cast<uint32_t>(*typeIdx)}};
+ }
+
bool end() const { return curHead().empty(); }
ParserHead copy() const { return *this; }
@@ -701,17 +860,41 @@ inline parsed_inst_t ExpressionParser::parseSpecificInstruction(OpBuilder &) {
void ValueStack::dump() const {
llvm::dbgs() << "================= Wasm ValueStack =======================\n";
llvm::dbgs() << "size: " << size() << "\n";
+ llvm::dbgs() << "nbFrames: " << labelLevel.size() << '\n';
llvm::dbgs() << "<Top>"
<< "\n";
// Stack is pushed to via push_back. Therefore the top of the stack is the
// end of the vector. Iterate in reverse so that the first thing we print
// is the top of the stack.
+ auto indexGetter = [this]() {
+ size_t idx = labelLevel.size();
+ return [this, idx]() mutable -> std::optional<std::pair<size_t, size_t>> {
+ llvm::dbgs() << "IDX: " << idx << '\n';
+ if (idx == 0)
+ return std::nullopt;
+ auto frameId = idx - 1;
+ auto frameLimit = labelLevel[frameId].stackIdx;
+ idx -= 1;
+ return {{frameId, frameLimit}};
+ };
+ };
+ auto getNextFrameIndex = indexGetter();
+ auto nextFrameIdx = getNextFrameIndex();
size_t stackSize = size();
- for (size_t idx = 0; idx < stackSize; idx++) {
+ for (size_t idx = 0; idx < stackSize; ++idx) {
size_t actualIdx = stackSize - 1 - idx;
+ while (nextFrameIdx && (nextFrameIdx->second > actualIdx)) {
+ llvm::dbgs() << " --------------- Frame (" << nextFrameIdx->first
+ << ")\n";
+ nextFrameIdx = getNextFrameIndex();
+ }
llvm::dbgs() << " ";
values[actualIdx].dump();
}
+ while (nextFrameIdx) {
+ llvm::dbgs() << " --------------- Frame (" << nextFrameIdx->first << ")\n";
+ nextFrameIdx = getNextFrameIndex();
+ }
llvm::dbgs() << "<Bottom>"
<< "\n";
llvm::dbgs() << "=========================================================\n";
@@ -726,7 +909,7 @@ parsed_inst_t ValueStack::popOperands(TypeRange operandTypes, Location *opLoc) {
return emitError(*opLoc,
"stack doesn't contain enough values. trying to get ")
<< operandTypes.size() << " operands on a stack containing only "
- << values.size() << " values.";
+ << values.size() << " values";
size_t stackIdxOffset = values.size() - operandTypes.size();
SmallVector<Value> res{};
res.reserve(operandTypes.size());
@@ -735,8 +918,7 @@ parsed_inst_t ValueStack::popOperands(TypeRange operandTypes, Location *opLoc) {
Type stackType = operand.getType();
if (stackType != operandTypes[i])
return emitError(*opLoc, "invalid operand type on stack. expecting ")
- << operandTypes[i] << ", value on stack is of type " << stackType
- << ".";
+ << operandTypes[i] << ", value on stack is of type " << stackType;
LDBG() << " POP: " << operand;
res.push_back(operand);
}
@@ -792,6 +974,151 @@ ExpressionParser::parse(OpBuilder &builder,
}
}
+llvm::FailureOr<FunctionType>
+ExpressionParser::parseBlockFuncType(OpBuilder &builder) {
+ return getFuncTypeFor(builder, parser.parseBlockType(builder.getContext()));
+}
+
+template <typename OpToCreate>
+parsed_inst_t ExpressionParser::parseBlockLikeOp(OpBuilder &builder) {
+ auto opLoc = currentOpLoc;
+ auto funcType = parseBlockFuncType(builder);
+ if (failed(funcType))
+ return failure();
+
+ auto inputTypes = funcType->getInputs();
+ auto inputOps = popOperands(inputTypes);
+ if (failed(inputOps))
+ return failure();
+
+ Block *curBlock = builder.getBlock();
+ Region *curRegion = curBlock->getParent();
+ auto resTypes = funcType->getResults();
+ llvm::SmallVector<Location> locations{};
+ locations.resize(resTypes.size(), *currentOpLoc);
+ auto *successor =
+ builder.createBlock(curRegion, curRegion->end(), resTypes, locations);
+ builder.setInsertionPointToEnd(curBlock);
+ auto blockOp =
+ builder.create<OpToCreate>(*currentOpLoc, *inputOps, successor);
+ auto *blockBody = blockOp.createBlock();
+ if (failed(parseBlockContent(builder, blockBody, resTypes, *opLoc, blockOp)))
+ return failure();
+ builder.setInsertionPointToStart(successor);
+ return {ValueRange{successor->getArguments()}};
+}
+
+template <>
+inline parsed_inst_t
+ExpressionParser::parseSpecificInstruction<WasmBinaryEncoding::OpCode::block>(
+ OpBuilder &builder) {
+ return parseBlockLikeOp<BlockOp>(builder);
+}
+
+template <>
+inline parsed_inst_t
+ExpressionParser::parseSpecificInstruction<WasmBinaryEncoding::OpCode::loop>(
+ OpBuilder &builder) {
+ return parseBlockLikeOp<LoopOp>(builder);
+}
+
+template <>
+inline parsed_inst_t ExpressionParser::parseSpecificInstruction<
+ WasmBinaryEncoding::OpCode::ifOpCode>(OpBuilder &builder) {
+ auto opLoc = currentOpLoc;
+ auto funcType = parseBlockFuncType(builder);
+ if (failed(funcType))
+ return failure();
+
+ LDBG() << "Parsing an if instruction of type " << *funcType;
+ auto inputTypes = funcType->getInputs();
+ auto conditionValue = popOperands(builder.getI32Type());
+ if (failed(conditionValue))
+ return failure();
+ auto inputOps = popOperands(inputTypes);
+ if (failed(inputOps))
+ return failure();
+
+ Block *curBlock = builder.getBlock();
+ Region *curRegion = curBlock->getParent();
+ auto resTypes = funcType->getResults();
+ llvm::SmallVector<Location> locations{};
+ locations.resize(resTypes.size(), *currentOpLoc);
+ auto *successor =
+ builder.createBlock(curRegion, curRegion->end(), resTypes, locations);
+ builder.setInsertionPointToEnd(curBlock);
+ auto ifOp = builder.create<IfOp>(*currentOpLoc, conditionValue->front(),
+ *inputOps, successor);
+ auto *ifEntryBlock = ifOp.createIfBlock();
+ constexpr auto ifElseFilter =
+ ByteSequence<WasmBinaryEncoding::endByte,
+ WasmBinaryEncoding::OpCode::elseOpCode>{};
+ auto parseIfRes = parseBlockContent(builder, ifEntryBlock, resTypes, *opLoc,
+ ifOp, ifElseFilter);
+ if (failed(parseIfRes))
+ return failure();
+ if (*parseIfRes == WasmBinaryEncoding::OpCode::elseOpCode) {
+ LDBG() << " else block is present.";
+ Block *elseEntryBlock = ifOp.createElseBlock();
+ auto parseElseRes =
+ parseBlockContent(builder, elseEntryBlock, resTypes, *opLoc, ifOp);
+ if (failed(parseElseRes))
+ return failure();
+ }
+ builder.setInsertionPointToStart(successor);
+ return {ValueRange{successor->getArguments()}};
+}
+
+template <>
+inline parsed_inst_t ExpressionParser::parseSpecificInstruction<
+ WasmBinaryEncoding::OpCode::branchIf>(OpBuilder &builder) {
+ auto level = parser.parseLiteral<uint32_t>();
+ if (failed(level))
+ return failure();
+ Block *curBlock = builder.getBlock();
+ Region *curRegion = curBlock->getParent();
+ auto sip = builder.saveInsertionPoint();
+ Block *elseBlock = builder.createBlock(curRegion, curRegion->end());
+ auto condition = popOperands(builder.getI32Type());
+ if (failed(condition))
+ return failure();
+ builder.restoreInsertionPoint(sip);
+ auto targetOp =
+ LabelBranchingOpInterface::getTargetOpFromBlock(curBlock, *level);
+ if (failed(targetOp))
+ return failure();
+ auto inputTypes = targetOp->getLabelTarget()->getArgumentTypes();
+ auto branchArgs = popOperands(inputTypes);
+ if (failed(branchArgs))
+ return failure();
+ builder.create<BranchIfOp>(*currentOpLoc, condition->front(),
+ builder.getUI32IntegerAttr(*level), *branchArgs,
+ elseBlock);
+ builder.setInsertionPointToStart(elseBlock);
+ return {*branchArgs};
+}
+
+template <>
+inline parsed_inst_t
+ExpressionParser::parseSpecificInstruction<WasmBinaryEncoding::OpCode::call>(
+ OpBuilder &builder) {
+ auto loc = *currentOpLoc;
+ auto funcIdx = parser.parseLiteral<uint32_t>();
+ if (failed(funcIdx))
+ return failure();
+ if (*funcIdx >= symbols.funcSymbols.size())
+ return emitError(loc, "Invalid function index: ") << *funcIdx;
+ auto callee = symbols.funcSymbols[*funcIdx];
+ llvm::ArrayRef<Type> inTypes = callee.functionType.getInputs();
+ llvm::ArrayRef<Type> resTypes = callee.functionType.getResults();
+ parsed_inst_t inOperands = popOperands(inTypes);
+ if (failed(inOperands))
+ return failure();
+ auto callOp =
+ builder.create<FuncCallOp>(loc, resTypes, callee.symbol, *inOperands);
+ return {callOp.getResults()};
+}
+
template <>
inline parsed_inst_t ExpressionParser::parseSpecificInstruction<
WasmBinaryEncoding::OpCode::localGet>(OpBuilder &builder) {
@@ -834,7 +1161,7 @@ parsed_inst_t ExpressionParser::parseSetOrTee(OpBuilder &builder) {
if (valueStack.empty())
return emitError(
*currentOpLoc,
- "invalid stack access, trying to access a value on an empty stack.");
+ "invalid stack access, trying to access a value on an empty stack");
parsed_inst_t poppedOp = popOperands(locals[*id].getType().getElementType());
if (failed(poppedOp))
@@ -1000,11 +1327,23 @@ inline parsed_inst_t ExpressionParser::buildNumericOp(
BUILD_NUMERIC_BINOP_FP(CopySignOp, copysign)
BUILD_NUMERIC_BINOP_FP(DivOp, div)
+BUILD_NUMERIC_BINOP_FP(GeOp, ge)
+BUILD_NUMERIC_BINOP_FP(GtOp, gt)
+BUILD_NUMERIC_BINOP_FP(LeOp, le)
+BUILD_NUMERIC_BINOP_FP(LtOp, lt)
BUILD_NUMERIC_BINOP_FP(MaxOp, max)
BUILD_NUMERIC_BINOP_FP(MinOp, min)
BUILD_NUMERIC_BINOP_INT(AndOp, and)
BUILD_NUMERIC_BINOP_INT(DivSIOp, divS)
BUILD_NUMERIC_BINOP_INT(DivUIOp, divU)
+BUILD_NUMERIC_BINOP_INT(GeSIOp, geS)
+BUILD_NUMERIC_BINOP_INT(GeUIOp, geU)
+BUILD_NUMERIC_BINOP_INT(GtSIOp, gtS)
+BUILD_NUMERIC_BINOP_INT(GtUIOp, gtU)
+BUILD_NUMERIC_BINOP_INT(LeSIOp, leS)
+BUILD_NUMERIC_BINOP_INT(LeUIOp, leU)
+BUILD_NUMERIC_BINOP_INT(LtSIOp, ltS)
+BUILD_NUMERIC_BINOP_INT(LtUIOp, ltU)
BUILD_NUMERIC_BINOP_INT(OrOp, or)
BUILD_NUMERIC_BINOP_INT(RemSIOp, remS)
BUILD_NUMERIC_BINOP_INT(RemUIOp, remU)
@@ -1015,7 +1354,9 @@ BUILD_NUMERIC_BINOP_INT(ShRSOp, shrS)
BUILD_NUMERIC_BINOP_INT(ShRUOp, shrU)
BUILD_NUMERIC_BINOP_INT(XOrOp, xor)
BUILD_NUMERIC_BINOP_INTFP(AddOp, add)
+BUILD_NUMERIC_BINOP_INTFP(EqOp, eq)
BUILD_NUMERIC_BINOP_INTFP(MulOp, mul)
+BUILD_NUMERIC_BINOP_INTFP(NeOp, ne)
BUILD_NUMERIC_BINOP_INTFP(SubOp, sub)
BUILD_NUMERIC_UNARY_OP_FP(AbsOp, abs)
BUILD_NUMERIC_UNARY_OP_FP(CeilOp, ceil)
@@ -1025,6 +1366,7 @@ BUILD_NUMERIC_UNARY_OP_FP(SqrtOp, sqrt)
BUILD_NUMERIC_UNARY_OP_FP(TruncOp, trunc)
BUILD_NUMERIC_UNARY_OP_INT(ClzOp, clz)
BUILD_NUMERIC_UNARY_OP_INT(CtzOp, ctz)
+BUILD_NUMERIC_UNARY_OP_INT(EqzOp, eqz)
BUILD_NUMERIC_UNARY_OP_INT(PopCntOp, popcnt)
// Don't need these anymore so let's undef them.
@@ -1036,6 +1378,105 @@ BUILD_NUMERIC_UNARY_OP_INT(PopCntOp, popcnt)
#undef BUILD_NUMERIC_OP
#undef BUILD_NUMERIC_CAST_OP
+template <typename opType, typename inputType, typename outputType,
+ typename... extraArgsT>
+inline parsed_inst_t ExpressionParser::buildConvertOp(OpBuilder &builder,
+ extraArgsT... extraArgs) {
+ static_assert(std::is_arithmetic_v<inputType>,
+ "InputType should be an arithmetic type");
+ static_assert(std::is_arithmetic_v<outputType>,
+ "OutputType should be an arithmetic type");
+ auto intype = buildLiteralType<inputType>(builder);
+ auto outType = buildLiteralType<outputType>(builder);
+ auto operand = popOperands(intype);
+ if (failed(operand))
+ return failure();
+ auto op = builder.create<opType>(*currentOpLoc, outType, operand->front(),
+ extraArgs...);
+ LDBG() << "Built operation: " << op;
+ return {{op.getResult()}};
+}
+
+template <>
+inline parsed_inst_t ExpressionParser::parseSpecificInstruction<
+ WasmBinaryEncoding::OpCode::demoteF64ToF32>(OpBuilder &builder) {
+ return buildConvertOp<DemoteOp, double, float>(builder);
+}
+
+template <>
+inline parsed_inst_t
+ExpressionParser::parseSpecificInstruction<WasmBinaryEncoding::OpCode::wrap>(
+ OpBuilder &builder) {
+ return buildConvertOp<WrapOp, int64_t, int32_t>(builder);
+}
+
+#define BUILD_CONVERSION_OP(IN_T, OUT_T, SOURCE_OP, TARGET_OP) \
+ template <> \
+ inline parsed_inst_t ExpressionParser::parseSpecificInstruction< \
+ WasmBinaryEncoding::OpCode::SOURCE_OP>(OpBuilder & builder) { \
+ return buildConvertOp<TARGET_OP, IN_T, OUT_T>(builder); \
+ }
+
+#define BUILD_CONVERT_OP_FOR(DEST_T, WIDTH) \
+ BUILD_CONVERSION_OP(uint32_t, DEST_T, convertUI32F##WIDTH, ConvertUOp) \
+ BUILD_CONVERSION_OP(int32_t, DEST_T, convertSI32F##WIDTH, ConvertSOp) \
+ BUILD_CONVERSION_OP(uint64_t, DEST_T, convertUI64F##WIDTH, ConvertUOp) \
+ BUILD_CONVERSION_OP(int64_t, DEST_T, convertSI64F##WIDTH, ConvertSOp)
+
+BUILD_CONVERT_OP_FOR(float, 32)
+BUILD_CONVERT_OP_FOR(double, 64)
+
+#undef BUILD_CONVERT_OP_FOR
+
+BUILD_CONVERSION_OP(int32_t, int64_t, extendS, ExtendSI32Op)
+BUILD_CONVERSION_OP(int32_t, int64_t, extendU, ExtendUI32Op)
+
+#undef BUILD_CONVERSION_OP
+
+#define BUILD_SLICE_EXTEND_PARSER(IT_WIDTH, EXTRACT_WIDTH) \
+ template <> \
+ parsed_inst_t ExpressionParser::parseSpecificInstruction< \
+ WasmBinaryEncoding::OpCode::extendI##IT_WIDTH##EXTRACT_WIDTH##S>( \
+ OpBuilder & builder) { \
+ using inout_t = int##IT_WIDTH##_t; \
+ auto attr = builder.getUI32IntegerAttr(EXTRACT_WIDTH); \
+ return buildConvertOp<ExtendLowBitsSOp, inout_t, inout_t>(builder, attr); \
+ }
+
+BUILD_SLICE_EXTEND_PARSER(32, 8)
+BUILD_SLICE_EXTEND_PARSER(32, 16)
+BUILD_SLICE_EXTEND_PARSER(64, 8)
+BUILD_SLICE_EXTEND_PARSER(64, 16)
+BUILD_SLICE_EXTEND_PARSER(64, 32)
+
+#undef BUILD_SLICE_EXTEND_PARSER
+
+template <>
+inline parsed_inst_t ExpressionParser::parseSpecificInstruction<
+ WasmBinaryEncoding::OpCode::promoteF32ToF64>(OpBuilder &builder) {
+ return buildConvertOp<PromoteOp, float, double>(builder);
+}
+
+#define BUILD_REINTERPRET_PARSER(WIDTH, FP_TYPE) \
+ template <> \
+ inline parsed_inst_t ExpressionParser::parseSpecificInstruction< \
+ WasmBinaryEncoding::OpCode::reinterpretF##WIDTH##AsI##WIDTH>(OpBuilder & \
+ builder) { \
+ return buildConvertOp<ReinterpretOp, FP_TYPE, int##WIDTH##_t>(builder); \
+ } \
+ \
+ template <> \
+ inline parsed_inst_t ExpressionParser::parseSpecificInstruction< \
+ WasmBinaryEncoding::OpCode::reinterpretI##WIDTH##AsF##WIDTH>(OpBuilder & \
+ builder) { \
+ return buildConvertOp<ReinterpretOp, int##WIDTH##_t, FP_TYPE>(builder); \
+ }
+
+BUILD_REINTERPRET_PARSER(32, float)
+BUILD_REINTERPRET_PARSER(64, double)
+
+#undef BUILD_REINTERPRET_PARSER
+
class WasmBinaryParser {
private:
struct SectionRegistry {
@@ -1153,7 +1594,7 @@ private:
if (tid.id >= symbols.moduleFuncTypes.size())
return emitError(loc, "invalid type id: ")
<< tid.id << ". Only " << symbols.moduleFuncTypes.size()
- << " type registration.";
+ << " type registrations";
FunctionType type = symbols.moduleFuncTypes[tid.id];
std::string symbol = symbols.getNewFuncSymbolName();
auto funcOp = FuncImportOp::create(builder, loc, symbol, moduleName,
@@ -1221,7 +1662,7 @@ public:
FileLineColLoc magicLoc = parser.getLocation();
FailureOr<StringRef> magic = parser.consumeNBytes(wasmHeader.size());
if (failed(magic) || magic->compare(wasmHeader)) {
- emitError(magicLoc, "source file does not contain valid Wasm header.");
+ emitError(magicLoc, "source file does not contain valid Wasm header");
return;
}
auto const expectedVersionString = StringRef{"\1\0\0\0", 4};
@@ -1391,7 +1832,7 @@ WasmBinaryParser::parseSectionItem<WasmSectionType::EXPORT>(ParserHead &ph,
return failure();
Operation *op = SymbolTable::lookupSymbolIn(mOp, *currentSymbol);
- SymbolTable::setSymbolVisibility(op, SymbolTable::Visibility::Public);
+ op->setAttr("exported", UnitAttr::get(op->getContext()));
StringAttr symName = SymbolTable::getSymbolName(op);
return SymbolTable{mOp}.rename(symName, *exportName);
}