aboutsummaryrefslogtreecommitdiff
path: root/mlir
diff options
context:
space:
mode:
authorharsh-nod <harsh@nod-labs.com>2023-12-07 15:01:55 -0800
committerGitHub <noreply@github.com>2023-12-07 15:01:55 -0800
commit42bba97fc24f045f593fc26f998bac9b08633255 (patch)
tree60f7e8d79159156bebffe4c510704726a6af53d1 /mlir
parentbfd41c3f8cc70bd65461a6d767f55c14d72150d9 (diff)
downloadllvm-42bba97fc24f045f593fc26f998bac9b08633255.zip
llvm-42bba97fc24f045f593fc26f998bac9b08633255.tar.gz
llvm-42bba97fc24f045f593fc26f998bac9b08633255.tar.bz2
[mlir] Extend CombineTransferReadOpTranspose pattern to handle extf ops. (#74754)
This patch modifies the CombineTransferReadOpTranspose pattern to handle extf ops. Also adds a test which shows the transpose getting folded into the transfer_read.
Diffstat (limited to 'mlir')
-rw-r--r--mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp8
-rw-r--r--mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir30
2 files changed, 36 insertions, 2 deletions
diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
index 429d113..f151011 100644
--- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
+++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp
@@ -455,7 +455,8 @@ struct CombineTransferReadOpTranspose final
Type resultType = op.getType();
Operation *extOp;
if ((extOp = source.getDefiningOp<arith::ExtSIOp>()) ||
- (extOp = source.getDefiningOp<arith::ExtUIOp>())) {
+ (extOp = source.getDefiningOp<arith::ExtUIOp>()) ||
+ (extOp = source.getDefiningOp<arith::ExtFOp>())) {
source = extOp->getOperand(0);
resultType =
VectorType::get(cast<VectorType>(resultType).getShape(),
@@ -493,9 +494,12 @@ struct CombineTransferReadOpTranspose final
if (isa<arith::ExtSIOp>(extOp))
result = rewriter.create<arith::ExtSIOp>(loc, op.getType(), result)
.getResult();
- else
+ else if (isa<arith::ExtUIOp>(extOp))
result = rewriter.create<arith::ExtUIOp>(loc, op.getType(), result)
.getResult();
+ else
+ result = rewriter.create<arith::ExtFOp>(loc, op.getType(), result)
+ .getResult();
}
rewriter.replaceOp(op, result);
diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
index fa9fff2..962ed7d 100644
--- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
+++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir
@@ -460,3 +460,33 @@ func.func @cast_f16_to_f32_write(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf
vector.transfer_write %cast, %arg3[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf32>, memref<16x16xf32>
return
}
+
+// -----
+
+#map1 = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map2 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#map3 = affine_map<(d0, d1, d2) -> (d0, d1)>
+
+// CHECK-DAG: #[[$MAP:.+]] = affine_map<(d0, d1) -> (d1, d0)>
+// CHECK-LABEL: func @fold_transpose_into_transfer_read(
+// CHECK-SAME: %[[ALLOC:.+]]: memref<64x128xf16>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[CST:.+]] = arith.constant 0.000000e+00 : f16
+// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true, true], permutation_map = #[[$MAP]]}
+// CHECK: %[[EXTF1:.+]] = arith.extf %[[READ]]
+// CHECK-NOT: vector.transpose
+// CHECK: %[[RESULT:.+]] = vector.contract
+func.func @fold_transpose_into_transfer_read(%alloc: memref<64x128xf16>, %vector: vector<32x128xf16>, %alloc2: memref<32x64xf32>) {
+ %c0 = arith.constant 0 : index
+ %cst = arith.constant 0.000000e+00 : f16
+ %init = arith.constant dense<0.000000e+00> : vector<32x64xf32>
+ %0 = vector.transfer_read %alloc[%c0, %c0], %cst {in_bounds = [true, true]} : memref<64x128xf16>, vector<64x128xf16>
+ %1 = arith.extf %0 : vector<64x128xf16> to vector<64x128xf32>
+ %2 = arith.extf %vector : vector<32x128xf16> to vector<32x128xf32>
+ %3 = vector.transpose %1, [1, 0] : vector<64x128xf32> to vector<128x64xf32>
+ %4 = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>} %2, %3, %init : vector<32x128xf32>, vector<128x64xf32> into vector<32x64xf32>
+ vector.transfer_write %4, %alloc2[%c0, %c0] {in_bounds = [true, true]} : vector<32x64xf32>, memref<32x64xf32>
+ return
+}
+
+// -----