aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp')
-rw-r--r--mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp74
1 files changed, 36 insertions, 38 deletions
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index 844e66e..badd2f6 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -25,9 +25,7 @@
#include "mlir/IR/Builders.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/Support/Debug.h"
#include <optional>
@@ -84,8 +82,8 @@ static Operation::operand_range getUpperBoundOperands(AffineForOp forOp) {
// Get a Value that corresponds to the loop step. If the step is an attribute,
// materialize a corresponding constant using builder.
static Value getOrCreateStep(AffineForOp forOp, OpBuilder &builder) {
- return builder.create<arith::ConstantIndexOp>(forOp.getLoc(),
- forOp.getStepAsInt());
+ return arith::ConstantIndexOp::create(builder, forOp.getLoc(),
+ forOp.getStepAsInt());
}
// Get a Value for the loop lower bound. If the value requires computation,
@@ -190,12 +188,12 @@ AffineLoopToGpuConverter::collectBounds(AffineForOp forOp, unsigned numLoops) {
return std::nullopt;
}
- Value range = builder.create<arith::SubIOp>(currentLoop.getLoc(),
- upperBound, lowerBound);
+ Value range = arith::SubIOp::create(builder, currentLoop.getLoc(),
+ upperBound, lowerBound);
Value step = getOrCreateStep(currentLoop, builder);
if (getConstantIntValue(step) != static_cast<int64_t>(1))
- range =
- builder.create<arith::CeilDivSIOp>(currentLoop.getLoc(), range, step);
+ range = arith::CeilDivSIOp::create(builder, currentLoop.getLoc(), range,
+ step);
dims.push_back(range);
lbs.push_back(lowerBound);
@@ -221,7 +219,7 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp,
// no loop mapped to a specific dimension, use constant "1" as its size.
Value constOne =
(numBlockDims < 3 || numThreadDims < 3)
- ? builder.create<arith::ConstantIndexOp>(rootForOp.getLoc(), 1)
+ ? arith::ConstantIndexOp::create(builder, rootForOp.getLoc(), 1)
: nullptr;
Value gridSizeX = numBlockDims > 0 ? dims[0] : constOne;
Value gridSizeY = numBlockDims > 1 ? dims[1] : constOne;
@@ -232,9 +230,9 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp,
// Create a launch op and move the body region of the innermost loop to the
// launch op.
- auto launchOp = builder.create<gpu::LaunchOp>(
- rootForOp.getLoc(), gridSizeX, gridSizeY, gridSizeZ, blockSizeX,
- blockSizeY, blockSizeZ);
+ auto launchOp =
+ gpu::LaunchOp::create(builder, rootForOp.getLoc(), gridSizeX, gridSizeY,
+ gridSizeZ, blockSizeX, blockSizeY, blockSizeZ);
// Replace the loop terminator (loops contain only a single block) with the
// gpu terminator and move the operations from the loop body block to the gpu
@@ -244,7 +242,7 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp,
Location terminatorLoc = terminator.getLoc();
terminator.erase();
builder.setInsertionPointToEnd(innermostForOp.getBody());
- builder.create<gpu::TerminatorOp>(terminatorLoc, TypeRange());
+ gpu::TerminatorOp::create(builder, terminatorLoc, TypeRange());
launchOp.getBody().front().getOperations().splice(
launchOp.getBody().front().begin(),
innermostForOp.getBody()->getOperations());
@@ -263,10 +261,10 @@ void AffineLoopToGpuConverter::createLaunch(AffineForOp rootForOp,
: getDim3Value(launchOp.getThreadIds(), en.index() - numBlockDims);
Value step = steps[en.index()];
if (getConstantIntValue(step) != static_cast<int64_t>(1))
- id = builder.create<arith::MulIOp>(rootForOp.getLoc(), step, id);
+ id = arith::MulIOp::create(builder, rootForOp.getLoc(), step, id);
Value ivReplacement =
- builder.create<arith::AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id);
+ arith::AddIOp::create(builder, rootForOp.getLoc(), *lbArgumentIt, id);
en.value().replaceAllUsesWith(ivReplacement);
std::advance(lbArgumentIt, 1);
std::advance(stepArgumentIt, 1);
@@ -319,8 +317,8 @@ static Value deriveStaticUpperBound(Value upperBound,
if (auto minOp = upperBound.getDefiningOp<AffineMinOp>()) {
for (const AffineExpr &result : minOp.getMap().getResults()) {
if (auto constExpr = dyn_cast<AffineConstantExpr>(result)) {
- return rewriter.create<arith::ConstantIndexOp>(minOp.getLoc(),
- constExpr.getValue());
+ return arith::ConstantIndexOp::create(rewriter, minOp.getLoc(),
+ constExpr.getValue());
}
}
}
@@ -344,8 +342,8 @@ static Value deriveStaticUpperBound(Value upperBound,
if ((lhs.value() < 0) != (rhs.value() < 0))
return {};
- return rewriter.create<arith::ConstantIndexOp>(
- multiplyOp.getLoc(), lhs.value() * rhs.value());
+ return arith::ConstantIndexOp::create(rewriter, multiplyOp.getLoc(),
+ lhs.value() * rhs.value());
}
}
@@ -422,8 +420,8 @@ static LogicalResult processParallelLoop(
if (launchIndependent(val))
return val;
if (auto constOp = val.getDefiningOp<arith::ConstantOp>())
- return rewriter.create<arith::ConstantOp>(constOp.getLoc(),
- constOp.getValue());
+ return arith::ConstantOp::create(rewriter, constOp.getLoc(),
+ constOp.getValue());
return {};
};
@@ -453,8 +451,8 @@ static LogicalResult processParallelLoop(
1, 2,
rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) +
rewriter.getAffineSymbolExpr(1));
- newIndex = rewriter.create<AffineApplyOp>(
- loc, annotation.getMap().compose(lowerAndStep),
+ newIndex = AffineApplyOp::create(
+ rewriter, loc, annotation.getMap().compose(lowerAndStep),
ValueRange{operand, ensureLaunchIndependent(step),
ensureLaunchIndependent(lowerBound)});
// If there was also a bound, insert that, too.
@@ -498,8 +496,8 @@ static LogicalResult processParallelLoop(
1, 2,
((rewriter.getAffineDimExpr(0) - rewriter.getAffineSymbolExpr(0))
.ceilDiv(rewriter.getAffineSymbolExpr(1))));
- Value launchBound = rewriter.create<AffineApplyOp>(
- loc, annotation.getBound().compose(stepMap),
+ Value launchBound = AffineApplyOp::create(
+ rewriter, loc, annotation.getBound().compose(stepMap),
ValueRange{
ensureLaunchIndependent(
cloningMap.lookupOrDefault(upperBound)),
@@ -517,10 +515,10 @@ static LogicalResult processParallelLoop(
if (!boundIsPrecise) {
// We are using an approximation, create a surrounding conditional.
Value originalBound = std::get<3>(config);
- arith::CmpIOp pred = rewriter.create<arith::CmpIOp>(
- loc, arith::CmpIPredicate::slt, newIndex,
+ arith::CmpIOp pred = arith::CmpIOp::create(
+ rewriter, loc, arith::CmpIPredicate::slt, newIndex,
cloningMap.lookupOrDefault(originalBound));
- scf::IfOp ifOp = rewriter.create<scf::IfOp>(loc, pred, false);
+ scf::IfOp ifOp = scf::IfOp::create(rewriter, loc, pred, false);
rewriter.setInsertionPointToStart(&ifOp.getThenRegion().front());
// Put a sentinel into the worklist so we know when to pop out of the
// if body again. We use the launchOp here, as that cannot be part of
@@ -530,10 +528,10 @@ static LogicalResult processParallelLoop(
}
} else {
// Create a sequential for loop.
- auto loopOp = rewriter.create<scf::ForOp>(
- loc, cloningMap.lookupOrDefault(lowerBound),
- cloningMap.lookupOrDefault(upperBound),
- cloningMap.lookupOrDefault(step));
+ auto loopOp = scf::ForOp::create(rewriter, loc,
+ cloningMap.lookupOrDefault(lowerBound),
+ cloningMap.lookupOrDefault(upperBound),
+ cloningMap.lookupOrDefault(step));
newIndex = loopOp.getInductionVar();
rewriter.setInsertionPointToStart(loopOp.getBody());
// Put a sentinel into the worklist so we know when to pop out of the loop
@@ -608,12 +606,12 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
// sizes. Those will be refined later as we discover them from mappings.
Location loc = parallelOp.getLoc();
Value constantOne =
- rewriter.create<arith::ConstantIndexOp>(parallelOp.getLoc(), 1);
- gpu::LaunchOp launchOp = rewriter.create<gpu::LaunchOp>(
- parallelOp.getLoc(), constantOne, constantOne, constantOne, constantOne,
- constantOne, constantOne);
+ arith::ConstantIndexOp::create(rewriter, parallelOp.getLoc(), 1);
+ gpu::LaunchOp launchOp = gpu::LaunchOp::create(
+ rewriter, parallelOp.getLoc(), constantOne, constantOne, constantOne,
+ constantOne, constantOne, constantOne);
rewriter.setInsertionPointToEnd(&launchOp.getBody().front());
- rewriter.create<gpu::TerminatorOp>(loc);
+ gpu::TerminatorOp::create(rewriter, loc);
rewriter.setInsertionPointToStart(&launchOp.getBody().front());
IRMapping cloningMap;
@@ -667,7 +665,7 @@ ParallelToGpuLaunchLowering::matchAndRewrite(ParallelOp parallelOp,
if (externalValues.size())
return failure();
// Replace by gpu.all_reduce.
- auto gpuRedOp = rewriter.create<gpu::AllReduceOp>(loc, newValue);
+ auto gpuRedOp = gpu::AllReduceOp::create(rewriter, loc, newValue);
cloningMap.map(parentLoop->getResult(0), gpuRedOp.getResult());
// Copy region.
rewriter.inlineRegionBefore(reduceOp.getRegion(0), gpuRedOp.getRegion(),