aboutsummaryrefslogtreecommitdiff
path: root/mlir/test
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test')
-rw-r--r--mlir/test/CAPI/llvm.c3
-rw-r--r--mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir81
-rw-r--r--mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir15
-rw-r--r--mlir/test/Dialect/Linalg/canonicalize.mlir55
-rw-r--r--mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir40
-rw-r--r--mlir/test/Dialect/Linalg/invalid.mlir10
-rw-r--r--mlir/test/Dialect/Mesh/spmdization.mlir14
-rw-r--r--mlir/test/Dialect/Tosa/ops.mlir14
-rw-r--r--mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir82
-rw-r--r--mlir/test/Dialect/Vector/vector-transfer-flatten.mlir38
-rw-r--r--mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir9
-rw-r--r--mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir9
-rw-r--r--mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir8
-rw-r--r--mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir7
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir8
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir10
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir8
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir7
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir6
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir5
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir25
-rw-r--r--mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir24
-rw-r--r--mlir/test/Target/LLVMIR/Import/import-failure.ll40
-rw-r--r--mlir/test/lib/Dialect/ArmSME/CMakeLists.txt18
-rw-r--r--mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp99
-rw-r--r--mlir/test/lib/Dialect/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Dialect/Test/TestPatterns.cpp14
35 files changed, 481 insertions, 217 deletions
diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c
index 5a78fac..1817988 100644
--- a/mlir/test/CAPI/llvm.c
+++ b/mlir/test/CAPI/llvm.c
@@ -15,6 +15,7 @@
#include "mlir-c/Support.h"
#include <assert.h>
+#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -105,7 +106,7 @@ static int testStructTypeCreation(MlirContext ctx) {
// CHECK: i8
// CHECK: i32
// CHECK: i64
- fprintf(stderr, "num elements: %ld\n",
+ fprintf(stderr, "num elements: %" PRIdPTR "\n",
mlirLLVMStructTypeGetNumElementTypes(literal));
for (intptr_t i = 0; i < 3; ++i) {
mlirTypeDump(mlirLLVMStructTypeGetElementType(literal, i));
diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
index 3de2f11d..56129db 100644
--- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
+++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir
@@ -77,6 +77,18 @@ func.func @log1p_2dvector_fmf(%arg0 : vector<4x3xf32>) {
// -----
+// CHECK-LABEL: func @log1p_scalable_vector(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32>
+func.func @log1p_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> {
+ // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
+ // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[VEC]] : vector<[4]xf32>
+ // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (vector<[4]xf32>) -> vector<[4]xf32>
+ %0 = math.log1p %arg0 : vector<[4]xf32>
+ func.return %0 : vector<[4]xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @expm1(
// CHECK-SAME: f32
func.func @expm1(%arg0 : f32) {
@@ -113,6 +125,18 @@ func.func @expm1_vector(%arg0 : vector<4xf32>) {
// -----
+// CHECK-LABEL: func @expm1_scalable_vector(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32>
+func.func @expm1_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> {
+ // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
+ // CHECK: %[[EXP:.*]] = llvm.intr.exp(%[[VEC]]) : (vector<[4]xf32>) -> vector<[4]xf32>
+ // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : vector<[4]xf32>
+ %0 = math.expm1 %arg0 : vector<[4]xf32>
+ func.return %0 : vector<[4]xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @expm1_vector_fmf(
// CHECK-SAME: vector<4xf32>
func.func @expm1_vector_fmf(%arg0 : vector<4xf32>) {
@@ -177,6 +201,16 @@ func.func @cttz_vec(%arg0 : vector<4xi32>) {
// -----
+// CHECK-LABEL: func @cttz_scalable_vec(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xi32>
+func.func @cttz_scalable_vec(%arg0 : vector<[4]xi32>) -> vector<[4]xi32> {
+ // CHECK: "llvm.intr.cttz"(%[[VEC]]) <{is_zero_poison = false}> : (vector<[4]xi32>) -> vector<[4]xi32>
+ %0 = math.cttz %arg0 : vector<[4]xi32>
+ func.return %0 : vector<[4]xi32>
+}
+
+// -----
+
// CHECK-LABEL: func @ctpop(
// CHECK-SAME: i32
func.func @ctpop(%arg0 : i32) {
@@ -197,6 +231,16 @@ func.func @ctpop_vector(%arg0 : vector<3xi32>) {
// -----
+// CHECK-LABEL: func @ctpop_scalable_vector(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xi32>
+func.func @ctpop_scalable_vector(%arg0 : vector<[4]xi32>) -> vector<[4]xi32> {
+ // CHECK: llvm.intr.ctpop(%[[VEC]]) : (vector<[4]xi32>) -> vector<[4]xi32>
+ %0 = math.ctpop %arg0 : vector<[4]xi32>
+ func.return %0 : vector<[4]xi32>
+}
+
+// -----
+
// CHECK-LABEL: func @rsqrt_double(
// CHECK-SAME: f64
func.func @rsqrt_double(%arg0 : f64) {
@@ -233,6 +277,18 @@ func.func @rsqrt_vector(%arg0 : vector<4xf32>) {
// -----
+// CHECK-LABEL: func @rsqrt_scalable_vector(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32>
+func.func @rsqrt_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32>{
+ // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
+ // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[VEC]]) : (vector<[4]xf32>) -> vector<[4]xf32>
+ // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<[4]xf32>
+ %0 = math.rsqrt %arg0 : vector<[4]xf32>
+ func.return %0 : vector<[4]xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @rsqrt_vector_fmf(
// CHECK-SAME: vector<4xf32>
func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) {
@@ -245,6 +301,18 @@ func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) {
// -----
+// CHECK-LABEL: func @rsqrt_scalable_vector_fmf(
+// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32>
+func.func @rsqrt_scalable_vector_fmf(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> {
+ // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
+ // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[VEC]]) {fastmathFlags = #llvm.fastmath<fast>} : (vector<[4]xf32>) -> vector<[4]xf32>
+ // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath<fast>} : vector<[4]xf32>
+ %0 = math.rsqrt %arg0 fastmath<fast> : vector<[4]xf32>
+ func.return %0 : vector<[4]xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @rsqrt_multidim_vector(
func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) {
// CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>>
@@ -258,6 +326,19 @@ func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) {
// -----
+// CHECK-LABEL: func @rsqrt_multidim_scalable_vector(
+func.func @rsqrt_multidim_scalable_vector(%arg0 : vector<4x[4]xf32>) -> vector<4x[4]xf32> {
+ // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<[4]xf32>>
+ // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32>
+ // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[EXTRACT]]) : (vector<[4]xf32>) -> vector<[4]xf32>
+ // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<[4]xf32>
+ // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %{{.*}}[0] : !llvm.array<4 x vector<[4]xf32>>
+ %0 = math.rsqrt %arg0 : vector<4x[4]xf32>
+ func.return %0 : vector<4x[4]xf32>
+}
+
+// -----
+
// CHECK-LABEL: func @fpowi(
// CHECK-SAME: f64
func.func @fpowi(%arg0 : f64, %arg1 : i32) {
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index febe74e..1fa783f 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -759,6 +759,21 @@ func.func @test_i8(%arg0: tensor<1xi8>) -> () {
// -----
+// CHECK-LABEL: @test_i64
+func.func @test_i64(%arg0: tensor<1xi64>) -> () {
+ // CHECK: linalg.generic
+ // CHECK: ^bb0(%[[ARG1:.+]]: i64,
+ // CHECK-DAG: %[[C127:.+]] = arith.constant -9223372036854775808
+ // CHECK-DAG: %[[C126:.+]] = arith.constant 9223372036854775807
+ // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]]
+ // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]]
+ %0 = tosa.clamp %arg0 {min_int = -9223372036854775808 : i64, max_int = 9223372036854775807 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi64>) -> tensor<1xi64>
+
+ return
+}
+
+// -----
+
// CHECK-LABEL: @test_clamp_f16
func.func @test_clamp_f16(%arg0: tensor<1xf16>) -> () {
// CHECK: linalg.generic
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 7adde31..206d7e9 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -102,17 +102,16 @@ func.func @tensor.cast.unranked(%a : tensor<*xf32>, %b : tensor<*xf32>, %c : ten
// -----
// CHECK-LABEL: func @linalg_effects(
-// CHECK-SAME: %[[A:[a-z0-9]*]]: tensor<?x?xf32>
-// CHECK-SAME: %[[B:[a-z0-9]*]]: memref<?x?xf32>
-// CHECK-SAME: %[[C:[a-z0-9]*]]: tensor<?x?xf32>
-func.func @linalg_effects(%a : tensor<?x?xf32>, %b : memref<?x?xf32>, %c : tensor<?x?xf32>) {
+func.func @linalg_effects(
+ %a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : tensor<?x?xf32>,
+ %d : memref<?x?xf32>, %e : memref<?x?xf32>, %f : memref<?x?xf32>) {
// CHECK-NOT: %{{.*}} = linalg.matmul
- %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>)
+ %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, tensor<?x?xf32>)
outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
// CHECK: linalg.matmul
- linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>)
- outs(%b : memref<?x?xf32>)
+ linalg.matmul ins(%d, %e : memref<?x?xf32>, memref<?x?xf32>)
+ outs(%f : memref<?x?xf32>)
return
}
@@ -889,11 +888,11 @@ func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) ->
// -----
#map = affine_map<(d0) -> (d0)>
-func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) {
+func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) {
linalg.generic {
indexing_maps = [#map, #map],
iterator_types = ["parallel"]
- } ins(%arg0 : tensor<?xf32>)
+ } ins(%arg0 : memref<?xf32>)
outs(%arg1 : memref<?xf32>) {
^bb0(%arg2 : f32, %arg3 : f32):
linalg.yield %arg2 : f32
@@ -901,14 +900,13 @@ func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) {
return
}
-// There was a crash in EraseIdentityGenericOp for generic with mixed semantics.
-// For now, check generic remained unchanged.
-// CHECK-LABEL: func @identity_mixed
-// CHECK-SAME: (%[[ARG1:.*]]: tensor<?xf32>, %[[ARG2:.*]]: memref<?xf32>)
+// Do not erase ops with buffer semantics.
+// CHECK-LABEL: func @identity_buffer
+// CHECK-SAME: (%[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: memref<?xf32>)
// CHECK: linalg.generic {
// CHECK-SAME: indexing_maps = [#map, #map],
// CHECK-SAME: iterator_types = ["parallel"]
-// CHECK-SAME: } ins(%[[ARG1]] : tensor<?xf32>)
+// CHECK-SAME: } ins(%[[ARG1]] : memref<?xf32>)
// CHECK-SAME: outs(%[[ARG2]] : memref<?xf32>) {
// -----
@@ -916,12 +914,12 @@ func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) {
// Just make sure that we don't crash.
// CHECK-LABEL: func @dedeplicate_regression_test
-func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) {
+func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: tensor<4xf32>) {
%36 = linalg.generic
{indexing_maps = [affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
- ins(%1, %1 : memref<4xf32>, memref<4xf32>)
+ ins(%1, %1 : tensor<4xf32>, tensor<4xf32>)
outs(%0 : tensor<4xf32>) {
^bb0(%in: f32, %in_24: f32, %out: f32):
linalg.yield %in : f32
@@ -937,31 +935,6 @@ func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) {
// -----
-#map = affine_map<(d0) -> (d0)>
-func.func @cast_producer_mixed(%arg0 : tensor<5xf32>, %arg1: memref<?xf32>) {
- %0 = tensor.cast %arg0 : tensor<5xf32> to tensor<?xf32>
- linalg.generic {
- indexing_maps = [#map, #map],
- iterator_types = ["parallel"]
- } ins(%0 : tensor<?xf32>)
- outs(%arg1 : memref<?xf32>) {
- ^bb0(%arg2 : f32, %arg3 : f32):
- linalg.yield %arg2 : f32
- }
- return
-}
-
-// We need a mixed linalg as a bridge between tensor and memref worlds.
-// CHECK-LABEL: func @cast_producer_mixed
-// CHECK-SAME: (%[[ARG1:.*]]: tensor<5xf32>, %[[ARG2:.*]]: memref<?xf32>)
-// CHECK: linalg.generic {
-// CHECK-SAME: indexing_maps = [#map, #map],
-// CHECK-SAME: iterator_types = ["parallel"]
-// CHECK-SAME: } ins(%[[ARG1]] : tensor<5xf32>)
-// CHECK-SAME: outs(%[[ARG2]] : memref<?xf32>) {
-
-// -----
-
// CHECK-LABEL: dead_softmax
func.func @dead_softmax(%arg0: tensor<16x64x256xf32>) -> tensor<16x64x256xf32> {
%0 = tensor.empty() : tensor<16x64x256xf32>
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
index 9d8421c..15a4f6c 100644
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -1110,43 +1110,3 @@ module {
// CHECK-DAG: %[[T3:.+]] = arith.addf %[[T2]], %[[B1]]
// CHECK: linalg.yield %[[T3]] : f32
// CHECK: return %[[GENERIC]]
-
-// -----
-
-// CHECK-DAG: [[$MAP0:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0, d1)>
-#map0 = affine_map<(d0, d1) -> (d0, d1)>
-
-// CHECK-LABEL: @mixed_fusion
-func.func @mixed_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>, %arg8 : memref<?x?xf32>)
-{
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
- %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
- %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
- %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
- ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
- outs(%2 : tensor<?x?xf32>) {
- ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
- %4 = arith.addf %arg3, %arg4 : f32
- linalg.yield %4 : f32
- } -> tensor<?x?xf32>
- // CHECK: linalg.generic {
- // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
- linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
- ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
- outs(%arg8 : memref<?x?xf32>) {
- // CHECK: ^{{[a-zA-Z0-9_]*}}
- // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
- // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
- // CHECK-SAME: [[ARG2:%[a-zA-Z0-9_]*]]
- ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
- // CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG0]], [[ARG1]]
- // CHECK-NOT: linalg.yield
- // CHECK: arith.mulf [[T1]], [[ARG2]]
- // CHECK: linalg.yield
- %5 = arith.mulf %arg5, %arg6 : f32
- linalg.yield %5 : f32
- }
- return
-}
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 916c04f..44c81c3 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -770,3 +770,13 @@ func.func @mmt4d_rank_mismatch(%A: tensor<16x16x8x1xf32>,
-> tensor<8x8xf32>
return %res : tensor<8x8xf32>
}
+
+// -----
+
+func.func @mixed_semantics(%a: tensor<?x?xf32>, %b: tensor<?x?xf32>, %c: memref<?x?xf32>) {
+ // expected-error @+1 {{expected to have pure tensor or buffer semantics}}
+ linalg.matmul ins(%a, %b: tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%c: memref<?x?xf32>)
+ return
+}
+
diff --git a/mlir/test/Dialect/Mesh/spmdization.mlir b/mlir/test/Dialect/Mesh/spmdization.mlir
index 2fb8029..572d3eb 100644
--- a/mlir/test/Dialect/Mesh/spmdization.mlir
+++ b/mlir/test/Dialect/Mesh/spmdization.mlir
@@ -127,3 +127,17 @@ func.func @multiple_chained_ops(
// CHECK: return %[[RESHARD3]] : tensor<1xi8>
return %7 : tensor<2xi8>
}
+
+// CHECK-LABEL: func @incomplete_sharding
+func.func @incomplete_sharding(
+ // CHECK-SAME: %[[ARG:.*]]: tensor<4x16xf32>
+ %arg0: tensor<8x16xf32>
+// CHECK-SAME: -> tensor<4x16xf32> {
+) -> tensor<8x16xf32> {
+ %0 = mesh.shard %arg0 to <@mesh_1d, [[0]]> annotate_for_users : tensor<8x16xf32>
+ // CHECK: %[[RES:.*]] = tosa.sigmoid %[[ARG]] : (tensor<4x16xf32>) -> tensor<4x16xf32>
+ %1 = tosa.sigmoid %0 : (tensor<8x16xf32>) -> tensor<8x16xf32>
+ %2 = mesh.shard %1 to <@mesh_1d, [[0]]> : tensor<8x16xf32>
+ // CHECK: return %[[RES]] : tensor<4x16xf32>
+ return %2 : tensor<8x16xf32>
+}
diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir
index 3d68464..01b2707 100644
--- a/mlir/test/Dialect/Tosa/ops.mlir
+++ b/mlir/test/Dialect/Tosa/ops.mlir
@@ -376,6 +376,13 @@ func.func @test_clz(%arg0: tensor<13x21x3xi32>) -> tensor<13x21x3xi32> {
}
// -----
+// CHECK-LABEL: cos
+func.func @test_cos(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
+ %0 = tosa.cos %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
+ return %0 : tensor<13x21x3xf32>
+}
+
+// -----
// CHECK-LABEL: exp
func.func @test_exp(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
%0 = tosa.exp %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
@@ -425,6 +432,13 @@ func.func @test_rsqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
}
// -----
+// CHECK-LABEL: sin
+func.func @test_sin(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
+ %0 = tosa.sin %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
+ return %0 : tensor<13x21x3xf32>
+}
+
+// -----
// CHECK-LABEL: select
func.func @test_select(%arg0: tensor<1x1x1xi1>, %arg1: tensor<13x21x3xf32>, %arg2: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> {
%0 = tosa.select %arg0, %arg1, %arg2 : (tensor<1x1x1xi1>, tensor<13x21x3xf32>, tensor<13x21x3xf32>) -> tensor<13x21x3xf32>
diff --git a/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir b/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir
index 02063a8..94e78ce 100644
--- a/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir
+++ b/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir
@@ -195,53 +195,89 @@ func.func @f3ext(%a: vector<5xi8>) -> vector<8xi17> {
// CHECK-LABEL: func.func @aligned_extsi(
func.func @aligned_extsi(%a: vector<8xi4>) -> vector<8xi32> {
- // CHECK: arith.shli
- // CHECK: arith.shrsi
- // CHECK: arith.shrsi
- // CHECK: vector.shuffle
- // CHECK: arith.extsi %{{.*}} : vector<8xi8> to vector<8xi32>
+// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi32> {
+// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
+// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
+// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
+// CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8xi8> to vector<8xi32>
%0 = arith.extsi %a : vector<8xi4> to vector<8xi32>
return %0 : vector<8xi32>
}
+// CHECK-LABEL: func.func @aligned_extsi_2d(
+func.func @aligned_extsi_2d(%a: vector<8x32xi4>) -> vector<8x32xi32> {
+// CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xi32> {
+// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8>
+// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8>
+// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8>
+// CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xi32>
+ %0 = arith.extsi %a : vector<8x32xi4> to vector<8x32xi32>
+ return %0 : vector<8x32xi32>
+}
+
// CHECK-LABEL: func.func @aligned_extsi_base_case(
func.func @aligned_extsi_base_case(%a: vector<8xi4>) -> vector<8xi8> {
- // CHECK: arith.shli
- // CHECK: arith.shrsi
- // CHECK: arith.shrsi
- // CHECK: vector.shuffle
- // CHECK-NOT: arith.extsi
+// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi8> {
+// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
+// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
+// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
%0 = arith.extsi %a : vector<8xi4> to vector<8xi8>
return %0 : vector<8xi8>
}
// CHECK-LABEL: func.func @aligned_sitofp(
func.func @aligned_sitofp(%a: vector<8xi4>) -> vector<8xf32> {
- // CHECK: arith.shli
- // CHECK: arith.shrsi
- // CHECK: arith.shrsi
- // CHECK: shuffle
- // CHECK: arith.sitofp %{{.*}} : vector<8xi8> to vector<8xf32>
+// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xf32> {
+// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8>
+// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8>
+// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8>
+// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8>
+// CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8xi8> to vector<8xf32>
%0 = arith.sitofp %a : vector<8xi4> to vector<8xf32>
return %0 : vector<8xf32>
}
+// CHECK-LABEL: func.func @aligned_sitofp_2d(
+func.func @aligned_sitofp_2d(%a: vector<8x32xi4>) -> vector<8x32xf32> {
+// CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xf32> {
+// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8>
+// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8>
+// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8>
+// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8>
+// CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xf32>
+ %0 = arith.sitofp %a : vector<8x32xi4> to vector<8x32xf32>
+ return %0 : vector<8x32xf32>
+}
+
// CHECK-LABEL: func.func @i4_transpose(
-// CHECK-SAME: %[[A:[0-9a-z]*]]
func.func @i4_transpose(%a: vector<8x16xi4>) -> vector<16x8xi4> {
- // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi4> to vector<8x16xi8>
- // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
- // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4>
+// CHECK-SAME: %[[IN:.*]]: vector<8x16xi4>) -> vector<16x8xi4> {
+// CHECK: %[[EXT:.*]] = vector.interleave
+// CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
+// CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4>
%0 = vector.transpose %a, [1, 0] : vector<8x16xi4> to vector<16x8xi4>
return %0 : vector<16x8xi4>
}
// CHECK-LABEL: func.func @i7_transpose(
-// CHECK-SAME: %[[A:[0-9a-z]*]]
func.func @i7_transpose(%a: vector<8x16xi7>) -> vector<16x8xi7> {
- // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi7> to vector<8x16xi8>
- // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
- // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7>
+// CHECK-SAME: %[[IN:.*]]: vector<8x16xi7>) -> vector<16x8xi7> {
+// CHECK: %[[EXT:.*]] = arith.extsi %[[IN]] : vector<8x16xi7> to vector<8x16xi8>
+// CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8>
+// CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7>
%0 = vector.transpose %a, [1, 0] : vector<8x16xi7> to vector<16x8xi7>
return %0 : vector<16x8xi7>
}
diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
index 1775b5f..788ae9a 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir
@@ -83,7 +83,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices(
return
}
-// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 * 43)>
+// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0, s1] -> (s0 * 24 + s1 * 6)>
// CHECK-LABEL: func.func @transfer_read_dims_mismatch_non_zero_indices(
// CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
@@ -92,7 +92,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices(
// CHECK: %[[C_0:.*]] = arith.constant 0 : i32
// CHECK: %[[C_0_IDX:.*]] = arith.constant 0 : index
// CHECK: %[[COLLAPSED_IN:.*]] = memref.collapse_shape %[[M_IN]] {{\[}}[0], [1, 2, 3]] : memref<1x43x4x6xi32> into memref<1x1032xi32>
-// CHECK: %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_2]], %[[IDX_1]]]
+// CHECK: %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_1]], %[[IDX_2]]]
// CHECK: %[[READ:.*]] = vector.transfer_read %[[COLLAPSED_IN]][%[[C_0_IDX]], %[[COLLAPSED_IDX]]], %[[C_0]] {in_bounds = [true]} : memref<1x1032xi32>, vector<12xi32>
// CHECK: %[[COLLAPSED_OUT:.*]] = memref.collapse_shape %[[M_OUT]] {{\[}}[0, 1, 2]] : memref<1x2x6xi32> into memref<12xi32>
// CHECK: vector.transfer_write %[[READ]], %[[COLLAPSED_OUT]][%[[C_0_IDX]]] {in_bounds = [true]} : vector<12xi32>, memref<12xi32>
@@ -459,3 +459,37 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>,
// CHECK-128B-LABEL: func @fold_unit_dims_entirely(
// CHECK-128B-NOT: memref.collapse_shape
+
+// -----
+
+func.func @regression_non_contiguous_dim_read(%subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>,
+ %idx0 : index, %idx1 : index) -> vector<2x2xf32> {
+ %c0 = arith.constant 0 : index
+ %cst_1 = arith.constant 0.000000e+00 : f32
+ %8 = vector.transfer_read %subview[%c0, %idx0, %idx1, %c0], %cst_1 {in_bounds = [true, true]} : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>, vector<2x2xf32>
+ return %8 : vector<2x2xf32>
+}
+
+// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 * 2)>
+// CHECK-LABEL: func.func @regression_non_contiguous_dim_read(
+// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> into memref<1x3x6xf32, strided<[40, 10, 1], offset: ?>>
+// CHECK: %[[APPLY:.*]] = affine.apply #[[$MAP]]()
+
+// CHECK-128B-LABEL: func @regression_non_contiguous_dim_read(
+// CHECK-128B: memref.collapse_shape
+
+// -----
+
+func.func @unsupported_non_contiguous_dim_write(%value : vector<2x2xf32>,
+ %subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>,
+ %idx0 : index, %idx1 : index) {
+ %c0 = arith.constant 0 : index
+ vector.transfer_write %value, %subview[%c0, %idx0, %idx1, %c0] {in_bounds = [true, true]} : vector<2x2xf32>, memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>
+ return
+}
+
+// CHECK-LABEL: func.func @unsupported_non_contiguous_dim_write(
+// CHECK-NOT: memref.collapse_shape
+
+// CHECK-128B-LABEL: func @unsupported_non_contiguous_dim_write(
+// CHECK-128B-NOT: memref.collapse_shape
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
index 44ff1af..12f13e8 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
@@ -1,13 +1,8 @@
// RUN: mlir-opt %s \
-// RUN: -transform-interpreter \
-// RUN: -test-transform-dialect-erase-schedule \
+// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \
// RUN: -lower-vector-mask \
// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// RUN: -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -convert-arm-sme-to-llvm -cse -canonicalize \
-// RUN: -test-lower-to-llvm | \
+// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=entry -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
index c781d5e..34c5351 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
@@ -1,12 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \
-// RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \
-// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \
-// RUN: -convert-arm-sme-to-llvm \
-// RUN: -convert-vector-to-llvm=enable-arm-sve \
-// RUN: -cse -canonicalize -test-lower-to-llvm | \
+// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
+// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
index 31c3202..2bfdaa8 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
@@ -1,12 +1,6 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \
-// RUN: -canonicalize \
-// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \
-// RUN: -convert-arm-sme-to-llvm \
-// RUN: -convert-vector-to-llvm=enable-arm-sve \
-// RUN: -cse -canonicalize -test-lower-to-llvm | \
+// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir
index d5c3506..e376bdd 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir
@@ -1,11 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \
// RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \
-// RUN: -arm-sme-vector-legalization -canonicalize -cse \
-// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// RUN: -convert-vector-to-scf=full-unroll -convert-arm-sme-to-llvm \
-// RUN: -test-lower-to-llvm | \
+// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir
index 42fe21c..ee3866de 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir
@@ -1,10 +1,5 @@
// RUN: mlir-opt %s \
-// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// RUN: -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \
-// RUN: -convert-arm-sme-to-llvm -convert-vector-to-llvm=enable-arm-sve -cse \
-// RUN: -canonicalize -test-lower-to-llvm -verify-diagnostics | \
+// RUN: -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
index 59b4a7e..06b1c10 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
@@ -1,9 +1,5 @@
// DEFINE: %{entry_point} = test_load_store_zaq0
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
index 064141c..27be801 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
@@ -1,9 +1,5 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir
index 0827d9b..9d836d9 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir
@@ -1,10 +1,4 @@
-// RUN: mlir-opt %s -arm-sme-vector-legalization -cse -canonicalize \
-// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \
-// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \
-// RUN: -convert-arm-sme-to-llvm \
-// RUN: -convert-vector-to-llvm=enable-arm-sve \
-// RUN: -cse -canonicalize -test-lower-to-llvm | \
+// RUN: mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir
index f081838..a06ad37 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir
@@ -1,11 +1,7 @@
+// DEFINE: %{opts} =
// DEFINE: %{entry} = main
-// DEFINE: %{fusion_opts} = -arm-sme-outer-product-fusion
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme %{fusion_opts} \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm -o %t
+// DEFINE: -test-lower-to-arm-sme=%{opts} -test-lower-to-llvm -o %t
// DEFINE: %{run} = %mcr_aarch64_cmd %t \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry} -entry-point-result=void \
@@ -18,7 +14,7 @@
// Check result is the same when outerproducts are not combined into widening
// variant.
-// REDEFINE: %{fusion_opts} =
+// REDEFINE: %{opts} = fuse-outer-products=false
// RUN: %{run} | FileCheck %s
func.func @main() {
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
index 5f41b37..7e7869d 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
@@ -1,10 +1,6 @@
// DEFINE: %{entry_point} = test_outerproduct_no_accumulator_4x4xf32
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm -o %t
+// DEFINE: -test-lower-to-arm-sme -test-lower-to-llvm -o %t
// DEFINE: %{run} = %mcr_aarch64_cmd %t \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
index a1bb9b7..46bf799 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
@@ -1,10 +1,6 @@
// DEFINE: %{entry_point} = test_outerproduct_no_accumulator_2x2xf64
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm -o %t
+// DEFINE: -test-lower-to-arm-sme -test-lower-to-llvm -o %t
// DEFINE: %{run} = %mcr_aarch64_cmd %t \
// DEFINE: -march=aarch64 -mattr=+sve,+sme-f64f64 \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir
index 1770e57..9a353ec 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir
@@ -1,11 +1,5 @@
// DEFINE: %{entry} = main
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// DEFINE: -arm-sme-outer-product-fusion \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
index 6e028d5..52f5688 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
@@ -1,9 +1,5 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
index c0c1f55..710cc66 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
@@ -1,10 +1,5 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
index eee3c56..88bc0d0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
@@ -1,9 +1,5 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
index 223bc8c..e149174 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
@@ -1,8 +1,4 @@
-// RUN: mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// RUN: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// RUN: -convert-arm-sme-to-llvm -cse -canonicalize \
-// RUN: -test-lower-to-llvm | \
+// RUN: mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -march=aarch64 -mattr=+sve,+sme \
// RUN: -e entry -entry-point-result=i32 \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
index 2f151e2..b29790db 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
@@ -1,9 +1,5 @@
// DEFINE: %{entry_point} = za0_d_f64
-// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \
-// DEFINE: -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=i32 \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
index f28bf19..c8c401b 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
@@ -1,8 +1,5 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
-// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \
-// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \
-// DEFINE: -convert-arm-sme-to-llvm -test-lower-to-llvm
+// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=i32 \
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir
new file mode 100644
index 0000000..07989bd
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir
@@ -0,0 +1,25 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: %mcr_aarch64_cmd -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_c_runner_utils,%mlir_arm_runner_utils \
+// RUN: -march=aarch64 -mattr=+sve | \
+// RUN: FileCheck %s
+
+func.func @entry() {
+ %f1 = arith.constant 1.0 : f32
+ %f2 = arith.constant 2.0 : f32
+ %v1 = vector.splat %f1 : vector<[4]xf32>
+ %v2 = vector.splat %f2 : vector<[4]xf32>
+ vector.print %v1 : vector<[4]xf32>
+ vector.print %v2 : vector<[4]xf32>
+ //
+ // Test vectors:
+ //
+ // CHECK: ( 1, 1, 1, 1
+ // CHECK: ( 2, 2, 2, 2
+
+ %v3 = vector.interleave %v1, %v2 : vector<[4]xf32>
+ vector.print %v3 : vector<[8]xf32>
+ // CHECK: ( 1, 2, 1, 2, 1, 2, 1, 2
+
+ return
+}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir
new file mode 100644
index 0000000..0bc78af
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir
@@ -0,0 +1,24 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_c_runner_utils | \
+// RUN: FileCheck %s
+
+func.func @entry() {
+ %f1 = arith.constant 1.0 : f32
+ %f2 = arith.constant 2.0 : f32
+ %v1 = vector.splat %f1 : vector<2x4xf32>
+ %v2 = vector.splat %f2 : vector<2x4xf32>
+ vector.print %v1 : vector<2x4xf32>
+ vector.print %v2 : vector<2x4xf32>
+ //
+ // Test vectors:
+ //
+ // CHECK: ( ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ) )
+ // CHECK: ( ( 2, 2, 2, 2 ), ( 2, 2, 2, 2 ) )
+
+ %v3 = vector.interleave %v1, %v2 : vector<2x4xf32>
+ vector.print %v3 : vector<2x8xf32>
+ // CHECK: ( ( 1, 2, 1, 2, 1, 2, 1, 2 ), ( 1, 2, 1, 2, 1, 2, 1, 2 ) )
+
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/Import/import-failure.ll b/mlir/test/Target/LLVMIR/Import/import-failure.ll
index 0962134..9a4e939 100644
--- a/mlir/test/Target/LLVMIR/Import/import-failure.ll
+++ b/mlir/test/Target/LLVMIR/Import/import-failure.ll
@@ -59,13 +59,15 @@ define void @unhandled_intrinsic() gc "example" {
; // -----
+; Check that debug intrinsics with an unsupported argument are dropped.
+
declare void @llvm.dbg.value(metadata, metadata, metadata)
; CHECK: import-failure.ll
-; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !DIArgList(i64 %arg1, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value)), !dbg !5
+; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !DIArgList(i64 %{{.*}}, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value))
; CHECK: import-failure.ll
-; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression()), !dbg !5
-define void @dropped_instruction(i64 %arg1) {
+; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression())
+define void @unsupported_argument(i64 %arg1) {
call void @llvm.dbg.value(metadata !DIArgList(i64 %arg1, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value)), !dbg !5
call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression()), !dbg !5
ret void
@@ -83,6 +85,38 @@ define void @dropped_instruction(i64 %arg1) {
; // -----
+; Check that debug intrinsics that depend on cyclic metadata are dropped.
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+; CHECK: import-failure.ll
+; CHECK-SAME: warning: dropped instruction: call void @llvm.dbg.label(metadata !{{.*}})
+; CHECK: import-failure.ll
+; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata i64 %{{.*}}, metadata !3, metadata !DIExpression())
+define void @cylic_metadata(i64 %arg1) {
+ call void @llvm.dbg.value(metadata i64 %arg1, metadata !10, metadata !DIExpression()), !dbg !14
+ call void @llvm.dbg.label(metadata !13), !dbg !14
+ ret void
+}
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2)
+!2 = !DIFile(filename: "import-failure.ll", directory: "/")
+!3 = !DICompositeType(tag: DW_TAG_array_type, size: 42, baseType: !4)
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3)
+!5 = distinct !DISubprogram(name: "class_method", scope: !2, file: !2, type: !6, spFlags: DISPFlagDefinition, unit: !1)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!3}
+!10 = !DILocalVariable(scope: !5, name: "arg1", file: !2, line: 1, arg: 1, align: 64);
+!11 = !DILexicalBlock(scope: !5)
+!12 = !DILexicalBlockFile(scope: !11, discriminator: 0)
+!13 = !DILabel(scope: !12, name: "label", file: !2, line: 42)
+!14 = !DILocation(line: 1, column: 2, scope: !5)
+
+; // -----
+
; global_dtors with non-null data fields cannot be represented in MLIR.
; CHECK: <unknown>
; CHECK-SAME: error: unhandled global variable: @llvm.global_dtors
diff --git a/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt
new file mode 100644
index 0000000..e942c7b
--- /dev/null
+++ b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Exclude tests from libMLIR.so
+add_mlir_library(MLIRArmSMETestPasses
+ TestLowerToArmSME.cpp
+
+ EXCLUDE_FROM_LIBMLIR
+
+ LINK_LIBS PUBLIC
+ MLIRArithToArmSME
+ MLIRArmSMEToLLVM
+ MLIRArmSMEToSCF
+ MLIRArmSMETransforms
+ MLIRArmSVETransforms
+ MLIRIR
+ MLIRPass
+ MLIRTransforms
+ MLIRVectorToArmSME
+ MLIRVectorToSCF
+ )
diff --git a/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp b/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp
new file mode 100644
index 0000000..48d4a58
--- /dev/null
+++ b/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp
@@ -0,0 +1,99 @@
+//===- TestLowerToArmSME.cpp - Test lowering to ArmSME as a sink pass -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass for testing the lowering to ArmSME as a
+// generally usable sink pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h"
+#include "mlir/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.h"
+#include "mlir/Conversion/ArmSMEToSCF/ArmSMEToSCF.h"
+#include "mlir/Conversion/VectorToArmSME/VectorToArmSME.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Dialect/ArmSME/Transforms/Passes.h"
+#include "mlir/Dialect/ArmSVE/Transforms/Passes.h"
+#include "mlir/IR/DialectRegistry.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+struct TestLowerToArmSMEOptions
+ : public PassPipelineOptions<TestLowerToArmSMEOptions> {
+ PassOptions::Option<bool> fuseOuterProducts{
+ *this, "fuse-outer-products",
+ llvm::cl::desc("Fuse outer product operations via "
+ "'-arm-sme-outer-product-fusion' pass"),
+ llvm::cl::init(true)};
+};
+
+void buildTestLowerToArmSME(OpPassManager &pm,
+ const TestLowerToArmSMEOptions &options) {
+ // Legalize vector operations so they can be converted to ArmSME.
+ pm.addPass(arm_sme::createVectorLegalizationPass());
+
+ // Sprinkle some cleanups.
+ pm.addPass(createCanonicalizerPass());
+ pm.addPass(createCSEPass());
+
+ // Passes that convert operations on vectors to ArmSME operations.
+
+ // Convert Arith to ArmSME.
+ pm.addPass(createArithToArmSMEConversionPass());
+ // Convert Vector to ArmSME.
+ pm.addPass(createConvertVectorToArmSMEPass());
+
+ // Fuse outer products.
+ if (options.fuseOuterProducts)
+ pm.addPass(arm_sme::createOuterProductFusionPass());
+
+ // Convert operations on high-level vectors to loops.
+
+ // Convert ArmSME to SCF.
+ pm.addPass(createConvertArmSMEToSCFPass());
+
+ // Convert Vector to SCF (with full unroll enabled).
+ pm.addPass(createConvertVectorToSCFPass(
+ VectorTransferToSCFOptions().enableFullUnroll()));
+
+ // Allocate tiles for ArmSME operations.
+ //
+ // Later passes may create further ArmSME ops that implement the
+ // ArmSMETileOpInterface, but tiles are allocated for root operations,
+ // all of which should now exist.
+ pm.addPass(arm_sme::createTileAllocationPass());
+
+ // Enable streaming-mode and ZA.
+ pm.addPass(arm_sme::createEnableArmStreamingPass(
+ arm_sme::ArmStreamingMode::StreamingLocally, arm_sme::ArmZaMode::NewZA,
+ /*onlyIfRequiredByOps=*/true));
+
+ // Convert ArmSME to LLVM.
+ pm.addPass(createConvertArmSMEToLLVMPass());
+
+ // Sprinkle some cleanups.
+ pm.addPass(createCanonicalizerPass());
+ pm.addPass(createCSEPass());
+}
+} // namespace
+
+namespace mlir {
+namespace test {
+void registerTestLowerToArmSME() {
+ PassPipelineRegistration<TestLowerToArmSMEOptions>(
+ "test-lower-to-arm-sme",
+ "An example pipeline to lower operations on vectors (arith, vector) to "
+ "LLVM via ArmSME.",
+ buildTestLowerToArmSME);
+}
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt
index 30a17c2..e20cd44 100644
--- a/mlir/test/lib/Dialect/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/CMakeLists.txt
@@ -1,5 +1,6 @@
add_subdirectory(Affine)
add_subdirectory(Arith)
+add_subdirectory(ArmSME)
add_subdirectory(Bufferization)
add_subdirectory(ControlFlow)
add_subdirectory(DLTI)
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
index 108cfe8..bde4255 100644
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -1152,8 +1152,10 @@ struct TestLegalizePatternDriver
// Handle a partial conversion.
if (mode == ConversionMode::Partial) {
DenseSet<Operation *> unlegalizedOps;
- if (failed(applyPartialConversion(
- getOperation(), target, std::move(patterns), &unlegalizedOps))) {
+ ConversionConfig config;
+ config.unlegalizedOps = &unlegalizedOps;
+ if (failed(applyPartialConversion(getOperation(), target,
+ std::move(patterns), config))) {
getOperation()->emitRemark() << "applyPartialConversion failed";
}
// Emit remarks for each legalizable operation.
@@ -1181,8 +1183,10 @@ struct TestLegalizePatternDriver
// Analyze the convertible operations.
DenseSet<Operation *> legalizedOps;
+ ConversionConfig config;
+ config.legalizableOps = &legalizedOps;
if (failed(applyAnalysisConversion(getOperation(), target,
- std::move(patterns), legalizedOps)))
+ std::move(patterns), config)))
return signalPassFailure();
// Emit remarks for each legalizable operation.
@@ -1806,8 +1810,10 @@ struct TestMergeBlocksPatternDriver
});
DenseSet<Operation *> unlegalizedOps;
+ ConversionConfig config;
+ config.unlegalizedOps = &unlegalizedOps;
(void)applyPartialConversion(getOperation(), target, std::move(patterns),
- &unlegalizedOps);
+ config);
for (auto *op : unlegalizedOps)
op->emitRemark() << "op '" << op->getName() << "' is not legalizable";
}