aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib/Dialect/XeGPU
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Dialect/XeGPU')
-rw-r--r--mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp2
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp27
-rw-r--r--mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp31
3 files changed, 32 insertions, 28 deletions
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
index 1599ae9..24e9095 100644
--- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
+++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -736,7 +736,7 @@ OpFoldResult genBinOp(OpFoldResult a, OpFoldResult b, Location loc,
OpBuilder &builder) {
auto aVal = getValueOrCreateConstantIndexOp(builder, loc, a);
auto bVal = getValueOrCreateConstantIndexOp(builder, loc, b);
- return builder.create<ArithOp>(loc, aVal, bVal).getResult();
+ return ArithOp::create(builder, loc, aVal, bVal).getResult();
}
// a helper utility to perform division operation on OpFoldResult and int64_t.
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
index 26770b3..d09dc19 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -1505,14 +1505,19 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
return AffineMap::get(val.getContext());
// Get the layout of the vector type.
xegpu::DistributeLayoutAttr layout = xegpu::getDistributeLayoutAttr(val);
- // If no layout is specified, assume the inner most dimension is distributed
- // for now.
+ // If no layout is specified, that means no distribution.
if (!layout)
- return AffineMap::getMultiDimMapWithTargets(
- vecRank, {static_cast<unsigned int>(vecRank - 1)}, val.getContext());
+ return AffineMap::getMultiDimMapWithTargets(vecRank, {},
+ val.getContext());
+ // Expecting vector and layout rank to match.
+ assert(layout.getRank() == vecRank &&
+ "Expecting vector and layout rank to match");
+ // A dimension is distributed only if layout suggests there are
+ // multiple lanes assigned for this dimension and the shape can be evenly
+ // distributed to those lanes.
SmallVector<unsigned int> distributedDims;
for (auto [i, v] : llvm::enumerate(layout.getEffectiveLaneLayoutAsInt())) {
- if (v > 1)
+ if (v > 1 && vecType.getShape()[i] % v == 0)
distributedDims.push_back(i);
}
return AffineMap::getMultiDimMapWithTargets(vecRank, distributedDims,
@@ -1525,15 +1530,13 @@ void XeGPUSubgroupDistributePass::runOnOperation() {
auto warpReduction = [](Location loc, OpBuilder &builder, Value input,
vector::CombiningKind kind, uint32_t size) {
// First reduce on a single thread to get per lane reduction value.
- Value laneVal = builder.create<vector::ReductionOp>(loc, kind, input);
+ Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
// Parallel reduction using butterfly shuffles.
for (uint64_t i = 1; i < size; i <<= 1) {
- Value shuffled =
- builder
- .create<gpu::ShuffleOp>(loc, laneVal, i,
- /*width=*/size,
- /*mode=*/gpu::ShuffleMode::XOR)
- .getShuffleResult();
+ Value shuffled = gpu::ShuffleOp::create(builder, loc, laneVal, i,
+ /*width=*/size,
+ /*mode=*/gpu::ShuffleMode::XOR)
+ .getShuffleResult();
laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
}
return laneVal;
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 31a967d..9fc5ad9 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -825,7 +825,7 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
auto tileAttr = DenseElementsAttr::get(VectorType::get(sgShape, eltType),
baseTileValues);
- auto baseConstVec = rewriter.create<arith::ConstantOp>(loc, tileAttr);
+ auto baseConstVec = arith::ConstantOp::create(rewriter, loc, tileAttr);
// Get subgroup id
Value sgId =
@@ -837,25 +837,26 @@ struct WgToSgArithConstantOp : public OpConversionPattern<arith::ConstantOp> {
SmallVector<Value, 2> strideConsts;
strideConsts.push_back(
- rewriter.create<arith::ConstantIndexOp>(loc, colStride));
+ arith::ConstantIndexOp::create(rewriter, loc, colStride));
if (rows > 1)
strideConsts.insert(
strideConsts.begin(),
- rewriter.create<arith::ConstantIndexOp>(loc, rowStride));
+ arith::ConstantIndexOp::create(rewriter, loc, rowStride));
SmallVector<Value> newConstOps;
for (auto offsets : *sgOffsets) {
// Multiply offset with stride, broadcast it and add to baseConstVec
- Value mulOffset = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+ Value mulOffset = arith::ConstantIndexOp::create(rewriter, loc, 0);
for (size_t i = 0; i < strideConsts.size(); ++i) {
- Value mul = rewriter.create<arith::MulIOp>(
- loc, rewriter.getIndexType(), offsets[i], strideConsts[i]);
- mulOffset = rewriter.create<arith::AddIOp>(
- loc, rewriter.getIndexType(), mulOffset, mul);
+ Value mul =
+ arith::MulIOp::create(rewriter, loc, rewriter.getIndexType(),
+ offsets[i], strideConsts[i]);
+ mulOffset = arith::AddIOp::create(
+ rewriter, loc, rewriter.getIndexType(), mulOffset, mul);
}
// Broadcast to baseConstVec size
- auto bcastOffset = rewriter.create<vector::BroadcastOp>(
- loc, baseConstVec.getType(), mulOffset);
+ auto bcastOffset = vector::BroadcastOp::create(
+ rewriter, loc, baseConstVec.getType(), mulOffset);
auto finalConst =
arith::AddIOp::create(rewriter, loc, baseConstVec, bcastOffset);
setLayoutIfNeeded(baseConstVec);
@@ -1138,8 +1139,8 @@ struct WgToSgVectorShapeCastOp
SmallVector<Value> newShapeCastOps;
for (auto src : adaptor.getSource()) {
- auto newShapeCast =
- rewriter.create<vector::ShapeCastOp>(op.getLoc(), newResultType, src);
+ auto newShapeCast = vector::ShapeCastOp::create(rewriter, op.getLoc(),
+ newResultType, src);
if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
!layout.getEffectiveInstDataAsInt().empty())
xegpu::setDistributeLayoutAttr(newShapeCast->getResult(0),
@@ -1201,9 +1202,9 @@ struct WgToSgMultiDimReductionOp
SmallVector<Value> newReductions;
for (auto sgSrc : adaptor.getSource()) {
- auto newOp = rewriter.create<vector::MultiDimReductionOp>(
- op.getLoc(), newDstType, op.getKind(), sgSrc, adaptor.getAcc()[0],
- op.getReductionDims());
+ auto newOp = vector::MultiDimReductionOp::create(
+ rewriter, op.getLoc(), newDstType, op.getKind(), sgSrc,
+ adaptor.getAcc()[0], op.getReductionDims());
if (!layout.getEffectiveLaneLayoutAsInt().empty() ||
!layout.getEffectiveInstDataAsInt().empty())
xegpu::setDistributeLayoutAttr(newOp->getResult(0),