diff options
author | Matthias Springer <me@m-sp.org> | 2025-08-13 09:54:30 +0000 |
---|---|---|
committer | Matthias Springer <me@m-sp.org> | 2025-08-13 12:20:41 +0000 |
commit | e021424bdf55a3d3d83b71713a3eb2e97d8078f5 (patch) | |
tree | 89e28e3c0a270f83ac5e93b3c52a55b82229e02f | |
parent | 240c454c4d04586967531c0a999de5547d776f47 (diff) | |
download | llvm-users/matthias-springer/scf_for_unsigned.zip llvm-users/matthias-springer/scf_for_unsigned.tar.gz llvm-users/matthias-springer/scf_for_unsigned.tar.bz2 |
[mlir][SCF] `scf.for`: Add support for unsigned integer comparisonusers/matthias-springer/scf_for_unsigned
19 files changed, 133 insertions, 41 deletions
diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 0c1c15b..88df541 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -169,9 +169,13 @@ def ForOp : SCF_Op<"for", region capturing the loop body. The induction variable is represented as an argument of this region. This SSA value is a signless integer or index. The step is a value of same type but required to be positive, the lower and - upper bounds can be also negative or zero. The lower and upper bounds specify - a half-open range: the iteration is executed iff the signed comparison of induction - variable value is less than the upper bound and bigger or equal to the lower bound. + upper bounds can be also negative or zero. The lower and upper bounds + specify a half-open range: the iteration is executed iff the comparison of + induction variable value is less than the upper bound and bigger or equal + to the lower bound. + + By default, the integer comparison is signed. If the `unsignedCmp` unit + attribute is specified, the integer comparison is unsigned. The body region must contain exactly one block that terminates with `scf.yield`. Calling ForOp::build will create such a region and insert @@ -184,8 +188,8 @@ def ForOp : SCF_Op<"for", ... // body } ... - // Integer case. - scf.for %iv_32 = %lb_32 to %ub_32 step %step_32 : i32 { + // Unsigned integer case. + scf.for unsigned %iv_32 = %lb_32 to %ub_32 step %step_32 : i32 { ... // body } ``` @@ -258,7 +262,8 @@ def ForOp : SCF_Op<"for", let arguments = (ins AnySignlessIntegerOrIndex:$lowerBound, AnySignlessIntegerOrIndex:$upperBound, AnySignlessIntegerOrIndex:$step, - Variadic<AnyType>:$initArgs); + Variadic<AnyType>:$initArgs, + UnitAttr:$unsignedCmp); let results = (outs Variadic<AnyType>:$results); let regions = (region SizedRegion<1>:$region); @@ -266,7 +271,8 @@ def ForOp : SCF_Op<"for", let builders = [OpBuilder<(ins "Value":$lowerBound, "Value":$upperBound, "Value":$step, CArg<"ValueRange", "{}">:$initArgs, CArg<"function_ref<void(OpBuilder &, Location, Value, ValueRange)>", - "nullptr">)>]; + "nullptr">, + CArg<"bool", "false">:$unsignedCmp)>]; let extraClassDeclaration = [{ using BodyBuilderFn = diff --git a/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp index ba448e4..37cfc9f 100644 --- a/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp +++ b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp @@ -382,8 +382,11 @@ LogicalResult ForLowering::matchAndRewrite(ForOp forOp, // With the body block done, we can fill in the condition block. rewriter.setInsertionPointToEnd(conditionBlock); - auto comparison = arith::CmpIOp::create( - rewriter, loc, arith::CmpIPredicate::slt, iv, upperBound); + arith::CmpIPredicate predicate = forOp.getUnsignedCmp() + ? arith::CmpIPredicate::ult + : arith::CmpIPredicate::slt; + auto comparison = + arith::CmpIOp::create(rewriter, loc, predicate, iv, upperBound); cf::CondBranchOp::create(rewriter, loc, comparison, firstBodyBlock, ArrayRef<Value>(), endBlock, ArrayRef<Value>()); diff --git a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp index 84cbd86..1f239aa 100644 --- a/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp +++ b/mlir/lib/Conversion/SCFToEmitC/SCFToEmitC.cpp @@ -154,6 +154,10 @@ ForLowering::matchAndRewrite(ForOp forOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { Location loc = forOp.getLoc(); + if (forOp.getUnsignedCmp()) + return rewriter.notifyMatchFailure(forOp, + "unsigned loops are not supported"); + // Create an emitc::variable op for each result. These variables will be // assigned to by emitc::assign ops within the loop body. SmallVector<Value> resultVariables; diff --git a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp index dc92367..55ed31e 100644 --- a/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp +++ b/mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp @@ -178,8 +178,14 @@ struct ForOpConversion final : SCFToSPIRVPattern<scf::ForOp> { // Generate the rest of the loop header. rewriter.setInsertionPointToEnd(header); auto *mergeBlock = loopOp.getMergeBlock(); - auto cmpOp = spirv::SLessThanOp::create(rewriter, loc, rewriter.getI1Type(), - newIndVar, adaptor.getUpperBound()); + Value cmpOp; + if (forOp.getUnsignedCmp()) { + cmpOp = spirv::ULessThanOp::create(rewriter, loc, rewriter.getI1Type(), + newIndVar, adaptor.getUpperBound()); + } else { + cmpOp = spirv::SLessThanOp::create(rewriter, loc, rewriter.getI1Type(), + newIndVar, adaptor.getUpperBound()); + } spirv::BranchConditionalOp::create(rewriter, loc, cmpOp, body, ArrayRef<Value>(), mergeBlock, diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index fd530f2..9436f1c 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -594,7 +594,8 @@ static FailureOr<PackingResult> buildPackingLoopNestImpl( auto clonedForOp = scf::ForOp::create( rewriter, loc, bvm.lookupOrDefault(forOp.getLowerBound()), bvm.lookupOrDefault(forOp.getUpperBound()), - bvm.lookupOrDefault(forOp.getStep()), hoistedPackedTensor); + bvm.lookupOrDefault(forOp.getStep()), hoistedPackedTensor, + /*bodyBuilder=*/nullptr, forOp.getUnsignedCmp()); // Map the induction var, region args and results to the `clonedForOp`. bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp index 58986a6..922b7d6 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp @@ -55,7 +55,8 @@ static scf::ForOp replaceWithDifferentYield(RewriterBase &rewriter, scf::ForOp newLoop = scf::ForOp::create( rewriter, loop.getLoc(), loop.getLowerBound(), loop.getUpperBound(), - loop.getStep(), inits, [](OpBuilder &, Location, Value, ValueRange) {}); + loop.getStep(), inits, [](OpBuilder &, Location, Value, ValueRange) {}, + loop.getUnsignedCmp()); // Generate the new yield with the replaced operand. auto yieldOp = cast<scf::YieldOp>(loop.getBody()->getTerminator()); diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 0262a1b..89731de 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -318,9 +318,12 @@ void ConditionOp::getSuccessorRegions( void ForOp::build(OpBuilder &builder, OperationState &result, Value lb, Value ub, Value step, ValueRange initArgs, - BodyBuilderFn bodyBuilder) { + BodyBuilderFn bodyBuilder, bool unsignedCmp) { OpBuilder::InsertionGuard guard(builder); + if (unsignedCmp) + result.addAttribute(getUnsignedCmpAttrName(result.name), + builder.getUnitAttr()); result.addOperands({lb, ub, step}); result.addOperands(initArgs); for (Value v : initArgs) @@ -450,6 +453,9 @@ static void printInitializationList(OpAsmPrinter &p, } void ForOp::print(OpAsmPrinter &p) { + if (getUnsignedCmp()) + p << " unsigned"; + p << " " << getInductionVar() << " = " << getLowerBound() << " to " << getUpperBound() << " step " << getStep(); @@ -462,7 +468,8 @@ void ForOp::print(OpAsmPrinter &p) { p.printRegion(getRegion(), /*printEntryBlockArgs=*/false, /*printBlockTerminators=*/!getInitArgs().empty()); - p.printOptionalAttrDict((*this)->getAttrs()); + p.printOptionalAttrDict((*this)->getAttrs(), + /*elidedAttrs=*/getUnsignedCmpAttrName().strref()); } ParseResult ForOp::parse(OpAsmParser &parser, OperationState &result) { @@ -472,6 +479,10 @@ ParseResult ForOp::parse(OpAsmParser &parser, OperationState &result) { OpAsmParser::Argument inductionVariable; OpAsmParser::UnresolvedOperand lb, ub, step; + if (succeeded(parser.parseOptionalKeyword("unsigned"))) + result.addAttribute(getUnsignedCmpAttrName(result.name), + builder.getUnitAttr()); + // Parse the induction variable followed by '='. if (parser.parseOperand(inductionVariable.ssaName) || parser.parseEqual() || // Parse loop bounds. @@ -562,7 +573,7 @@ ForOp::replaceWithAdditionalYields(RewriterBase &rewriter, inits.append(newInitOperands.begin(), newInitOperands.end()); scf::ForOp newLoop = scf::ForOp::create( rewriter, getLoc(), getLowerBound(), getUpperBound(), getStep(), inits, - [](OpBuilder &, Location, Value, ValueRange) {}); + [](OpBuilder &, Location, Value, ValueRange) {}, getUnsignedCmp()); newLoop->setAttrs(getPrunedAttributeList(getOperation(), {})); // Generate the new yield values and append them to the scf.yield operation. @@ -806,7 +817,8 @@ mlir::scf::replaceAndCastForOpIterArg(RewriterBase &rewriter, scf::ForOp forOp, // 2. Create the new forOp shell. scf::ForOp newForOp = scf::ForOp::create( rewriter, forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), - forOp.getStep(), newIterOperands); + forOp.getStep(), newIterOperands, /*bodyBuilder=*/nullptr, + forOp.getUnsignedCmp()); newForOp->setAttrs(forOp->getAttrs()); Block &newBlock = newForOp.getRegion().front(); SmallVector<Value, 4> newBlockTransferArgs(newBlock.getArguments().begin(), @@ -931,7 +943,8 @@ struct ForOpIterArgsFolder : public OpRewritePattern<scf::ForOp> { scf::ForOp newForOp = scf::ForOp::create(rewriter, forOp.getLoc(), forOp.getLowerBound(), - forOp.getUpperBound(), forOp.getStep(), newIterArgs); + forOp.getUpperBound(), forOp.getStep(), newIterArgs, + /*bodyBuilder=*/nullptr, forOp.getUnsignedCmp()); newForOp->setAttrs(forOp->getAttrs()); Block &newBlock = newForOp.getRegion().front(); @@ -989,12 +1002,12 @@ struct ForOpIterArgsFolder : public OpRewritePattern<scf::ForOp> { /// Util function that tries to compute a constant diff between u and l. /// Returns std::nullopt when the difference between two AffineValueMap is /// dynamic. -static std::optional<int64_t> computeConstDiff(Value l, Value u) { +static std::optional<APInt> computeConstDiff(Value l, Value u) { IntegerAttr clb, cub; if (matchPattern(l, m_Constant(&clb)) && matchPattern(u, m_Constant(&cub))) { llvm::APInt lbValue = clb.getValue(); llvm::APInt ubValue = cub.getValue(); - return (ubValue - lbValue).getSExtValue(); + return ubValue - lbValue; } // Else a simple pattern match for x + c or c + x @@ -1003,7 +1016,7 @@ static std::optional<int64_t> computeConstDiff(Value l, Value u) { u, m_Op<arith::AddIOp>(matchers::m_Val(l), m_ConstantInt(&diff))) || matchPattern( u, m_Op<arith::AddIOp>(m_ConstantInt(&diff), matchers::m_Val(l)))) - return diff.getSExtValue(); + return diff; return std::nullopt; } @@ -1022,13 +1035,15 @@ struct SimplifyTrivialLoops : public OpRewritePattern<ForOp> { return success(); } - std::optional<int64_t> diff = + std::optional<APInt> diff = computeConstDiff(op.getLowerBound(), op.getUpperBound()); if (!diff) return failure(); // If the loop is known to have 0 iterations, remove it. - if (*diff <= 0) { + bool zeroOrLessIterations = + diff->isZero() || (!op.getUnsignedCmp() && diff->isNegative()); + if (zeroOrLessIterations) { rewriter.replaceOp(op, op.getInitArgs()); return success(); } @@ -3384,9 +3399,8 @@ ParseResult scf::WhileOp::parse(OpAsmParser &parser, OperationState &result) { if (functionType.getNumInputs() != operands.size()) { return parser.emitError(typeLoc) - << "expected as many input types as operands " - << "(expected " << operands.size() << " got " - << functionType.getNumInputs() << ")"; + << "expected as many input types as operands " << "(expected " + << operands.size() << " got " << functionType.getNumInputs() << ")"; } // Resolve input operands. diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index f8799c5..fb179e6 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -769,7 +769,8 @@ struct ForOpInterface // Construct a new scf.for op with memref instead of tensor values. auto newForOp = scf::ForOp::create( rewriter, forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), - forOp.getStep(), castedInitArgs); + forOp.getStep(), castedInitArgs, /*bodyBuilder=*/nullptr, + forOp.getUnsignedCmp()); newForOp->setAttrs(forOp->getAttrs()); Block *loopBody = newForOp.getBody(); diff --git a/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp b/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp index bee7780..ae52af5 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp @@ -58,9 +58,12 @@ struct ForLoopLoweringPattern : public OpRewritePattern<ForOp> { auto *beforeBlock = rewriter.createBlock( &whileOp.getBefore(), whileOp.getBefore().begin(), lcvTypes, lcvLocs); rewriter.setInsertionPointToStart(whileOp.getBeforeBody()); - auto cmpOp = arith::CmpIOp::create( - rewriter, whileOp.getLoc(), arith::CmpIPredicate::slt, - beforeBlock->getArgument(0), forOp.getUpperBound()); + arith::CmpIPredicate predicate = forOp.getUnsignedCmp() + ? arith::CmpIPredicate::ult + : arith::CmpIPredicate::slt; + auto cmpOp = arith::CmpIOp::create(rewriter, whileOp.getLoc(), predicate, + beforeBlock->getArgument(0), + forOp.getUpperBound()); scf::ConditionOp::create(rewriter, whileOp.getLoc(), cmpOp.getResult(), beforeBlock->getArguments()); diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp index 1130538..7e7fba4 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp @@ -791,6 +791,11 @@ FailureOr<ForOp> mlir::scf::pipelineForLoop(RewriterBase &rewriter, ForOp forOp, bool *modifiedIR) { if (modifiedIR) *modifiedIR = false; + + // TODO: Add support for unsigned loops. + if (forOp.getUnsignedCmp()) + return failure(); + LoopPipelinerInternal pipeliner; if (!pipeliner.initializeLoopInfo(forOp, options)) return failure(); diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp index 4752c08..f1203b2 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp @@ -256,6 +256,10 @@ struct ForLoopPeelingPattern : public OpRewritePattern<ForOp> { LogicalResult matchAndRewrite(ForOp forOp, PatternRewriter &rewriter) const override { + if (forOp.getUnsignedCmp()) + return rewriter.notifyMatchFailure(forOp, + "unsigned loops are not supported"); + // Do not peel already peeled loops. if (forOp->hasAttr(kPeeledLoopLabel)) return failure(); diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index 1b07b77..3b75970 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -116,7 +116,8 @@ public: llvm::getSingleElement(adaptor.getLowerBound()), llvm::getSingleElement(adaptor.getUpperBound()), llvm::getSingleElement(adaptor.getStep()), - flattenValues(adaptor.getInitArgs())); + flattenValues(adaptor.getInitArgs()), + /*bodyBuilder=*/nullptr, op.getUnsignedCmp()); // Reserve whatever attributes in the original op. newOp->setAttrs(op->getAttrs()); diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index c0e47ee..250c413 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -797,7 +797,8 @@ FailureOr<LoopLikeOpInterface> yieldTiledValuesAndReplaceLoop<scf::ForOp>( inits.append(newInitOperands.begin(), newInitOperands.end()); auto newLoop = scf::ForOp::create( rewriter, loc, loopOp.getLowerBound(), loopOp.getUpperBound(), - loopOp.getStep(), inits, [](OpBuilder &, Location, Value, ValueRange) {}); + loopOp.getStep(), inits, [](OpBuilder &, Location, Value, ValueRange) {}, + loopOp.getUnsignedCmp()); // Move the loop body to the new op. Block *loopBody = loopOp.getBody(); @@ -935,7 +936,8 @@ static LogicalResult addInitOperandsToLoopNest( auto newLoop = scf::ForOp::create( rewriter, forLoop.getLoc(), forLoop.getLowerBound(), forLoop.getUpperBound(), forLoop.getStep(), newInits, - [&](OpBuilder &b, Location loc, Value iv, ValueRange iterArgs) {}); + [&](OpBuilder &b, Location loc, Value iv, ValueRange iterArgs) {}, + forLoop.getUnsignedCmp()); // Merge the body of the new loop with the body of the old loops. SmallVector<Value> sourceBlockArgs; diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 5731795..4910258 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -1233,6 +1233,7 @@ static void getPerfectlyNestedLoopsImpl( static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef<scf::ForOp> targets) { + assert(!forOp.getUnsignedCmp() && "unsigned loops are not supported"); auto originalStep = forOp.getStep(); auto iv = forOp.getInductionVar(); @@ -1241,6 +1242,8 @@ static Loops stripmineSink(scf::ForOp forOp, Value factor, Loops innerLoops; for (auto t : targets) { + assert(!t.getUnsignedCmp() && "unsigned loops are not supported"); + // Save information for splicing ops out of t when done auto begin = t.getBody()->begin(); auto nOps = t.getBody()->getOperations().size(); @@ -1415,6 +1418,8 @@ scf::ForallOp mlir::fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForOp mlir::fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter) { + assert(source.getUnsignedCmp() == target.getUnsignedCmp() && + "incompatible signedness"); unsigned numTargetOuts = target.getNumResults(); unsigned numSourceOuts = source.getNumResults(); @@ -1428,7 +1433,8 @@ scf::ForOp mlir::fuseIndependentSiblingForLoops(scf::ForOp target, rewriter.setInsertionPointAfter(source); scf::ForOp fusedLoop = scf::ForOp::create( rewriter, source.getLoc(), source.getLowerBound(), source.getUpperBound(), - source.getStep(), fusedInitArgs); + source.getStep(), fusedInitArgs, /*bodyBuilder=*/nullptr, + source.getUnsignedCmp()); // Map original induction variables and operands to those of the fused loop. IRMapping mapping; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp index 4464450..febec6d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp @@ -533,8 +533,10 @@ static bool vectorizeStmt(PatternRewriter &rewriter, scf::ForOp forOp, VL vl, VectorType vtp = vectorType(vl, init.getType()); Value vinit = genVectorReducInit(rewriter, loc, yield->getOperand(0), forOp.getRegionIterArg(0), init, vtp); - forOpNew = scf::ForOp::create(rewriter, loc, forOp.getLowerBound(), - forOp.getUpperBound(), step, vinit); + forOpNew = + scf::ForOp::create(rewriter, loc, forOp.getLowerBound(), + forOp.getUpperBound(), step, vinit, + /*bodyBuilder=*/nullptr, forOp.getUnsignedCmp()); forOpNew->setAttr( LoopEmitter::getLoopEmitterLoopAttrName(), forOp->getAttr(LoopEmitter::getLoopEmitterLoopAttrName())); @@ -605,8 +607,8 @@ public: ForOpRewriter(MLIRContext *context, unsigned vectorLength, bool enableVLAVectorization, bool enableSIMDIndex32) - : OpRewritePattern(context), vl{vectorLength, enableVLAVectorization, - enableSIMDIndex32} {} + : OpRewritePattern(context), + vl{vectorLength, enableVLAVectorization, enableSIMDIndex32} {} LogicalResult matchAndRewrite(scf::ForOp op, PatternRewriter &rewriter) const override { diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp index bb0f339..be0d28a 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp @@ -1826,7 +1826,8 @@ struct WarpOpScfForOp : public WarpDistributionPattern { rewriter.setInsertionPointAfter(newWarpOp); auto newForOp = scf::ForOp::create( rewriter, forOp.getLoc(), forOp.getLowerBound(), forOp.getUpperBound(), - forOp.getStep(), newForOpOperands); + forOp.getStep(), newForOpOperands, /*bodyBuilder=*/nullptr, + forOp.getUnsignedCmp()); // Next, we insert a new `WarpOp` (called inner `WarpOp`) inside the // newly created `ForOp`. This `WarpOp` will contain all ops that were // contained within the original `ForOp` body. diff --git a/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir index ef0fa08..483c7b3 100644 --- a/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir +++ b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir @@ -18,6 +18,24 @@ func.func @simple_std_for_loop(%arg0 : index, %arg1 : index, %arg2 : index) { return } +// CHECK-LABEL: func @unsigned_loop(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { +// CHECK-NEXT: cf.br ^bb1(%{{.*}} : index) +// CHECK-NEXT: ^bb1(%{{.*}}: index): // 2 preds: ^bb0, ^bb2 +// CHECK-NEXT: %{{.*}} = arith.cmpi ult, %{{.*}}, %{{.*}} : index +// CHECK-NEXT: cf.cond_br %{{.*}}, ^bb2, ^bb3 +// CHECK-NEXT: ^bb2: // pred: ^bb1 +// CHECK-NEXT: %{{.*}} = arith.constant 1 : index +// CHECK-NEXT: %[[iv:.*]] = arith.addi %{{.*}}, %{{.*}} : index +// CHECK-NEXT: cf.br ^bb1(%[[iv]] : index) +// CHECK-NEXT: ^bb3: // pred: ^bb1 +// CHECK-NEXT: return +func.func @unsigned_loop(%arg0 : index, %arg1 : index, %arg2 : index) { + scf.for unsigned %i0 = %arg0 to %arg1 step %arg2 { + %c1 = arith.constant 1 : index + } + return +} + // CHECK-LABEL: func @simple_std_2_for_loops(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { // CHECK-NEXT: cf.br ^bb1(%{{.*}} : index) // CHECK-NEXT: ^bb1(%[[ub0:.*]]: index): // 2 preds: ^bb0, ^bb5 diff --git a/mlir/test/Conversion/SCFToSPIRV/for.mlir b/mlir/test/Conversion/SCFToSPIRV/for.mlir index 81661ec..9c55216 100644 --- a/mlir/test/Conversion/SCFToSPIRV/for.mlir +++ b/mlir/test/Conversion/SCFToSPIRV/for.mlir @@ -5,6 +5,7 @@ module attributes { #spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>> } { +// CHECK-LABEL: @loop_kernel func.func @loop_kernel(%arg2 : memref<10xf32, #spirv.storage_class<StorageBuffer>>, %arg3 : memref<10xf32, #spirv.storage_class<StorageBuffer>>) { // CHECK: %[[LB:.*]] = spirv.Constant 4 : i32 %lb = arith.constant 4 : index @@ -34,6 +35,19 @@ func.func @loop_kernel(%arg2 : memref<10xf32, #spirv.storage_class<StorageBuffer return } +// CHECK-LABEL: @unsigned_loop +// CHECK: spirv.ULessThan +func.func @unsigned_loop(%arg2 : memref<10xf32, #spirv.storage_class<StorageBuffer>>, %arg3 : memref<10xf32, #spirv.storage_class<StorageBuffer>>) { + %lb = arith.constant 4 : index + %ub = arith.constant 42 : index + %step = arith.constant 2 : index + scf.for unsigned %arg4 = %lb to %ub step %step { + %1 = memref.load %arg2[%arg4] : memref<10xf32, #spirv.storage_class<StorageBuffer>> + memref.store %1, %arg3[%arg4] : memref<10xf32, #spirv.storage_class<StorageBuffer>> + } + return +} + // CHECK-LABEL: @loop_yield func.func @loop_yield(%arg2 : memref<10xf32, #spirv.storage_class<StorageBuffer>>, %arg3 : memref<10xf32, #spirv.storage_class<StorageBuffer>>) { // CHECK: %[[LB:.*]] = spirv.Constant 4 : i32 diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir index 7f457ef..5930a1d 100644 --- a/mlir/test/Dialect/SCF/ops.mlir +++ b/mlir/test/Dialect/SCF/ops.mlir @@ -28,14 +28,14 @@ func.func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) { func.func @std_for_i32(%arg0 : i32, %arg1 : i32, %arg2 : i32) { scf.for %i0 = %arg0 to %arg1 step %arg2 : i32 { - scf.for %i1 = %arg0 to %arg1 step %arg2 : i32 { + scf.for unsigned %i1 = %arg0 to %arg1 step %arg2 : i32 { } } return } // CHECK-LABEL: func @std_for_i32( // CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} : i32 { -// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} : i32 { +// CHECK-NEXT: scf.for unsigned %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} : i32 { func.func @scf_for_i64_iter(%arg1: i64, %arg2: i64) { %c1_i64 = arith.constant 1 : i64 |