diff options
Diffstat (limited to 'mlir/test')
35 files changed, 481 insertions, 217 deletions
diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 5a78fac..1817988 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -15,6 +15,7 @@ #include "mlir-c/Support.h" #include <assert.h> +#include <inttypes.h> #include <math.h> #include <stdio.h> #include <stdlib.h> @@ -105,7 +106,7 @@ static int testStructTypeCreation(MlirContext ctx) { // CHECK: i8 // CHECK: i32 // CHECK: i64 - fprintf(stderr, "num elements: %ld\n", + fprintf(stderr, "num elements: %" PRIdPTR "\n", mlirLLVMStructTypeGetNumElementTypes(literal)); for (intptr_t i = 0; i < 3; ++i) { mlirTypeDump(mlirLLVMStructTypeGetElementType(literal, i)); diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir index 3de2f11d..56129db 100644 --- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir +++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir @@ -77,6 +77,18 @@ func.func @log1p_2dvector_fmf(%arg0 : vector<4x3xf32>) { // ----- +// CHECK-LABEL: func @log1p_scalable_vector( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> +func.func @log1p_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> + // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[VEC]] : vector<[4]xf32> + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (vector<[4]xf32>) -> vector<[4]xf32> + %0 = math.log1p %arg0 : vector<[4]xf32> + func.return %0 : vector<[4]xf32> +} + +// ----- + // CHECK-LABEL: func @expm1( // CHECK-SAME: f32 func.func @expm1(%arg0 : f32) { @@ -113,6 +125,18 @@ func.func @expm1_vector(%arg0 : vector<4xf32>) { // ----- +// CHECK-LABEL: func @expm1_scalable_vector( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> +func.func @expm1_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%[[VEC]]) : (vector<[4]xf32>) -> vector<[4]xf32> + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : vector<[4]xf32> + %0 = math.expm1 %arg0 : vector<[4]xf32> + func.return %0 : vector<[4]xf32> +} + +// ----- + // CHECK-LABEL: func @expm1_vector_fmf( // CHECK-SAME: vector<4xf32> func.func @expm1_vector_fmf(%arg0 : vector<4xf32>) { @@ -177,6 +201,16 @@ func.func @cttz_vec(%arg0 : vector<4xi32>) { // ----- +// CHECK-LABEL: func @cttz_scalable_vec( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xi32> +func.func @cttz_scalable_vec(%arg0 : vector<[4]xi32>) -> vector<[4]xi32> { + // CHECK: "llvm.intr.cttz"(%[[VEC]]) <{is_zero_poison = false}> : (vector<[4]xi32>) -> vector<[4]xi32> + %0 = math.cttz %arg0 : vector<[4]xi32> + func.return %0 : vector<[4]xi32> +} + +// ----- + // CHECK-LABEL: func @ctpop( // CHECK-SAME: i32 func.func @ctpop(%arg0 : i32) { @@ -197,6 +231,16 @@ func.func @ctpop_vector(%arg0 : vector<3xi32>) { // ----- +// CHECK-LABEL: func @ctpop_scalable_vector( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xi32> +func.func @ctpop_scalable_vector(%arg0 : vector<[4]xi32>) -> vector<[4]xi32> { + // CHECK: llvm.intr.ctpop(%[[VEC]]) : (vector<[4]xi32>) -> vector<[4]xi32> + %0 = math.ctpop %arg0 : vector<[4]xi32> + func.return %0 : vector<[4]xi32> +} + +// ----- + // CHECK-LABEL: func @rsqrt_double( // CHECK-SAME: f64 func.func @rsqrt_double(%arg0 : f64) { @@ -233,6 +277,18 @@ func.func @rsqrt_vector(%arg0 : vector<4xf32>) { // ----- +// CHECK-LABEL: func @rsqrt_scalable_vector( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> +func.func @rsqrt_scalable_vector(%arg0 : vector<[4]xf32>) -> vector<[4]xf32>{ + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[VEC]]) : (vector<[4]xf32>) -> vector<[4]xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<[4]xf32> + %0 = math.rsqrt %arg0 : vector<[4]xf32> + func.return %0 : vector<[4]xf32> +} + +// ----- + // CHECK-LABEL: func @rsqrt_vector_fmf( // CHECK-SAME: vector<4xf32> func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) { @@ -245,6 +301,18 @@ func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) { // ----- +// CHECK-LABEL: func @rsqrt_scalable_vector_fmf( +// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> +func.func @rsqrt_scalable_vector_fmf(%arg0 : vector<[4]xf32>) -> vector<[4]xf32> { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[VEC]]) {fastmathFlags = #llvm.fastmath<fast>} : (vector<[4]xf32>) -> vector<[4]xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath<fast>} : vector<[4]xf32> + %0 = math.rsqrt %arg0 fastmath<fast> : vector<[4]xf32> + func.return %0 : vector<[4]xf32> +} + +// ----- + // CHECK-LABEL: func @rsqrt_multidim_vector( func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> @@ -258,6 +326,19 @@ func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // ----- +// CHECK-LABEL: func @rsqrt_multidim_scalable_vector( +func.func @rsqrt_multidim_scalable_vector(%arg0 : vector<4x[4]xf32>) -> vector<4x[4]xf32> { + // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<[4]xf32>> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<[4]xf32>) : vector<[4]xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[EXTRACT]]) : (vector<[4]xf32>) -> vector<[4]xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<[4]xf32> + // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %{{.*}}[0] : !llvm.array<4 x vector<[4]xf32>> + %0 = math.rsqrt %arg0 : vector<4x[4]xf32> + func.return %0 : vector<4x[4]xf32> +} + +// ----- + // CHECK-LABEL: func @fpowi( // CHECK-SAME: f64 func.func @fpowi(%arg0 : f64, %arg1 : i32) { diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index febe74e..1fa783f 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -759,6 +759,21 @@ func.func @test_i8(%arg0: tensor<1xi8>) -> () { // ----- +// CHECK-LABEL: @test_i64 +func.func @test_i64(%arg0: tensor<1xi64>) -> () { + // CHECK: linalg.generic + // CHECK: ^bb0(%[[ARG1:.+]]: i64, + // CHECK-DAG: %[[C127:.+]] = arith.constant -9223372036854775808 + // CHECK-DAG: %[[C126:.+]] = arith.constant 9223372036854775807 + // CHECK-DAG: %[[LOWER:.+]] = arith.maxsi %[[C127]], %[[ARG1]] + // CHECK-DAG: %[[CLAMPED:.+]] = arith.minsi %[[C126]], %[[LOWER]] + %0 = tosa.clamp %arg0 {min_int = -9223372036854775808 : i64, max_int = 9223372036854775807 : i64, min_fp = 0.0 : f32, max_fp = 0.0 : f32} : (tensor<1xi64>) -> tensor<1xi64> + + return +} + +// ----- + // CHECK-LABEL: @test_clamp_f16 func.func @test_clamp_f16(%arg0: tensor<1xf16>) -> () { // CHECK: linalg.generic diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 7adde31..206d7e9 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -102,17 +102,16 @@ func.func @tensor.cast.unranked(%a : tensor<*xf32>, %b : tensor<*xf32>, %c : ten // ----- // CHECK-LABEL: func @linalg_effects( -// CHECK-SAME: %[[A:[a-z0-9]*]]: tensor<?x?xf32> -// CHECK-SAME: %[[B:[a-z0-9]*]]: memref<?x?xf32> -// CHECK-SAME: %[[C:[a-z0-9]*]]: tensor<?x?xf32> -func.func @linalg_effects(%a : tensor<?x?xf32>, %b : memref<?x?xf32>, %c : tensor<?x?xf32>) { +func.func @linalg_effects( + %a : tensor<?x?xf32>, %b : tensor<?x?xf32>, %c : tensor<?x?xf32>, + %d : memref<?x?xf32>, %e : memref<?x?xf32>, %f : memref<?x?xf32>) { // CHECK-NOT: %{{.*}} = linalg.matmul - %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>) + %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, tensor<?x?xf32>) outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32> // CHECK: linalg.matmul - linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>) - outs(%b : memref<?x?xf32>) + linalg.matmul ins(%d, %e : memref<?x?xf32>, memref<?x?xf32>) + outs(%f : memref<?x?xf32>) return } @@ -889,11 +888,11 @@ func.func @fold_multi_use_generic_op_with_consumer(%arg0 : tensor<?x?x?xf32>) -> // ----- #map = affine_map<(d0) -> (d0)> -func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) { +func.func @identity_buffer(%arg0 : memref<?xf32>, %arg1: memref<?xf32>) { linalg.generic { indexing_maps = [#map, #map], iterator_types = ["parallel"] - } ins(%arg0 : tensor<?xf32>) + } ins(%arg0 : memref<?xf32>) outs(%arg1 : memref<?xf32>) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 @@ -901,14 +900,13 @@ func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) { return } -// There was a crash in EraseIdentityGenericOp for generic with mixed semantics. -// For now, check generic remained unchanged. -// CHECK-LABEL: func @identity_mixed -// CHECK-SAME: (%[[ARG1:.*]]: tensor<?xf32>, %[[ARG2:.*]]: memref<?xf32>) +// Do not erase ops with buffer semantics. +// CHECK-LABEL: func @identity_buffer +// CHECK-SAME: (%[[ARG1:.*]]: memref<?xf32>, %[[ARG2:.*]]: memref<?xf32>) // CHECK: linalg.generic { // CHECK-SAME: indexing_maps = [#map, #map], // CHECK-SAME: iterator_types = ["parallel"] -// CHECK-SAME: } ins(%[[ARG1]] : tensor<?xf32>) +// CHECK-SAME: } ins(%[[ARG1]] : memref<?xf32>) // CHECK-SAME: outs(%[[ARG2]] : memref<?xf32>) { // ----- @@ -916,12 +914,12 @@ func.func @identity_mixed(%arg0 : tensor<?xf32>, %arg1: memref<?xf32>) { // Just make sure that we don't crash. // CHECK-LABEL: func @dedeplicate_regression_test -func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) { +func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: tensor<4xf32>) { %36 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%1, %1 : memref<4xf32>, memref<4xf32>) + ins(%1, %1 : tensor<4xf32>, tensor<4xf32>) outs(%0 : tensor<4xf32>) { ^bb0(%in: f32, %in_24: f32, %out: f32): linalg.yield %in : f32 @@ -937,31 +935,6 @@ func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) { // ----- -#map = affine_map<(d0) -> (d0)> -func.func @cast_producer_mixed(%arg0 : tensor<5xf32>, %arg1: memref<?xf32>) { - %0 = tensor.cast %arg0 : tensor<5xf32> to tensor<?xf32> - linalg.generic { - indexing_maps = [#map, #map], - iterator_types = ["parallel"] - } ins(%0 : tensor<?xf32>) - outs(%arg1 : memref<?xf32>) { - ^bb0(%arg2 : f32, %arg3 : f32): - linalg.yield %arg2 : f32 - } - return -} - -// We need a mixed linalg as a bridge between tensor and memref worlds. -// CHECK-LABEL: func @cast_producer_mixed -// CHECK-SAME: (%[[ARG1:.*]]: tensor<5xf32>, %[[ARG2:.*]]: memref<?xf32>) -// CHECK: linalg.generic { -// CHECK-SAME: indexing_maps = [#map, #map], -// CHECK-SAME: iterator_types = ["parallel"] -// CHECK-SAME: } ins(%[[ARG1]] : tensor<5xf32>) -// CHECK-SAME: outs(%[[ARG2]] : memref<?xf32>) { - -// ----- - // CHECK-LABEL: dead_softmax func.func @dead_softmax(%arg0: tensor<16x64x256xf32>) -> tensor<16x64x256xf32> { %0 = tensor.empty() : tensor<16x64x256xf32> diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir index 9d8421c..15a4f6c 100644 --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -1110,43 +1110,3 @@ module { // CHECK-DAG: %[[T3:.+]] = arith.addf %[[T2]], %[[B1]] // CHECK: linalg.yield %[[T3]] : f32 // CHECK: return %[[GENERIC]] - -// ----- - -// CHECK-DAG: [[$MAP0:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0, d1)> -#map0 = affine_map<(d0, d1) -> (d0, d1)> - -// CHECK-LABEL: @mixed_fusion -func.func @mixed_fusion(%arg0: tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, %arg2 : tensor<?x?xf32>, %arg8 : memref<?x?xf32>) -{ - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32> - %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32> - %2 = tensor.empty(%0, %1) : tensor<?x?xf32> - %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) - outs(%2 : tensor<?x?xf32>) { - ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - %4 = arith.addf %arg3, %arg4 : f32 - linalg.yield %4 : f32 - } -> tensor<?x?xf32> - // CHECK: linalg.generic { - // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} - linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} - ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>) - outs(%arg8 : memref<?x?xf32>) { - // CHECK: ^{{[a-zA-Z0-9_]*}} - // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]] - // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]] - // CHECK-SAME: [[ARG2:%[a-zA-Z0-9_]*]] - ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): - // CHECK: [[T1:%[a-zA-Z0-9_]*]] = arith.addf [[ARG0]], [[ARG1]] - // CHECK-NOT: linalg.yield - // CHECK: arith.mulf [[T1]], [[ARG2]] - // CHECK: linalg.yield - %5 = arith.mulf %arg5, %arg6 : f32 - linalg.yield %5 : f32 - } - return -} diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 916c04f..44c81c3 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -770,3 +770,13 @@ func.func @mmt4d_rank_mismatch(%A: tensor<16x16x8x1xf32>, -> tensor<8x8xf32> return %res : tensor<8x8xf32> } + +// ----- + +func.func @mixed_semantics(%a: tensor<?x?xf32>, %b: tensor<?x?xf32>, %c: memref<?x?xf32>) { + // expected-error @+1 {{expected to have pure tensor or buffer semantics}} + linalg.matmul ins(%a, %b: tensor<?x?xf32>, tensor<?x?xf32>) + outs(%c: memref<?x?xf32>) + return +} + diff --git a/mlir/test/Dialect/Mesh/spmdization.mlir b/mlir/test/Dialect/Mesh/spmdization.mlir index 2fb8029..572d3eb 100644 --- a/mlir/test/Dialect/Mesh/spmdization.mlir +++ b/mlir/test/Dialect/Mesh/spmdization.mlir @@ -127,3 +127,17 @@ func.func @multiple_chained_ops( // CHECK: return %[[RESHARD3]] : tensor<1xi8> return %7 : tensor<2xi8> } + +// CHECK-LABEL: func @incomplete_sharding +func.func @incomplete_sharding( + // CHECK-SAME: %[[ARG:.*]]: tensor<4x16xf32> + %arg0: tensor<8x16xf32> +// CHECK-SAME: -> tensor<4x16xf32> { +) -> tensor<8x16xf32> { + %0 = mesh.shard %arg0 to <@mesh_1d, [[0]]> annotate_for_users : tensor<8x16xf32> + // CHECK: %[[RES:.*]] = tosa.sigmoid %[[ARG]] : (tensor<4x16xf32>) -> tensor<4x16xf32> + %1 = tosa.sigmoid %0 : (tensor<8x16xf32>) -> tensor<8x16xf32> + %2 = mesh.shard %1 to <@mesh_1d, [[0]]> : tensor<8x16xf32> + // CHECK: return %[[RES]] : tensor<4x16xf32> + return %2 : tensor<8x16xf32> +} diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 3d68464..01b2707 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -376,6 +376,13 @@ func.func @test_clz(%arg0: tensor<13x21x3xi32>) -> tensor<13x21x3xi32> { } // ----- +// CHECK-LABEL: cos +func.func @test_cos(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { + %0 = tosa.cos %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32> + return %0 : tensor<13x21x3xf32> +} + +// ----- // CHECK-LABEL: exp func.func @test_exp(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = tosa.exp %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32> @@ -425,6 +432,13 @@ func.func @test_rsqrt(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { } // ----- +// CHECK-LABEL: sin +func.func @test_sin(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { + %0 = tosa.sin %arg0 : (tensor<13x21x3xf32>) -> tensor<13x21x3xf32> + return %0 : tensor<13x21x3xf32> +} + +// ----- // CHECK-LABEL: select func.func @test_select(%arg0: tensor<1x1x1xi1>, %arg1: tensor<13x21x3xf32>, %arg2: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { %0 = tosa.select %arg0, %arg1, %arg2 : (tensor<1x1x1xi1>, tensor<13x21x3xf32>, tensor<13x21x3xf32>) -> tensor<13x21x3xf32> diff --git a/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir b/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir index 02063a8..94e78ce 100644 --- a/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir +++ b/mlir/test/Dialect/Vector/vector-rewrite-narrow-types.mlir @@ -195,53 +195,89 @@ func.func @f3ext(%a: vector<5xi8>) -> vector<8xi17> { // CHECK-LABEL: func.func @aligned_extsi( func.func @aligned_extsi(%a: vector<8xi4>) -> vector<8xi32> { - // CHECK: arith.shli - // CHECK: arith.shrsi - // CHECK: arith.shrsi - // CHECK: vector.shuffle - // CHECK: arith.extsi %{{.*}} : vector<8xi8> to vector<8xi32> +// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi32> { +// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8> +// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8> +// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8> +// CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8xi8> to vector<8xi32> %0 = arith.extsi %a : vector<8xi4> to vector<8xi32> return %0 : vector<8xi32> } +// CHECK-LABEL: func.func @aligned_extsi_2d( +func.func @aligned_extsi_2d(%a: vector<8x32xi4>) -> vector<8x32xi32> { +// CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xi32> { +// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8> +// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8> +// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8> +// CHECK: %[[I32:.*]] = arith.extsi %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xi32> + %0 = arith.extsi %a : vector<8x32xi4> to vector<8x32xi32> + return %0 : vector<8x32xi32> +} + // CHECK-LABEL: func.func @aligned_extsi_base_case( func.func @aligned_extsi_base_case(%a: vector<8xi4>) -> vector<8xi8> { - // CHECK: arith.shli - // CHECK: arith.shrsi - // CHECK: arith.shrsi - // CHECK: vector.shuffle - // CHECK-NOT: arith.extsi +// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xi8> { +// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8> +// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8> +// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8> %0 = arith.extsi %a : vector<8xi4> to vector<8xi8> return %0 : vector<8xi8> } // CHECK-LABEL: func.func @aligned_sitofp( func.func @aligned_sitofp(%a: vector<8xi4>) -> vector<8xf32> { - // CHECK: arith.shli - // CHECK: arith.shrsi - // CHECK: arith.shrsi - // CHECK: shuffle - // CHECK: arith.sitofp %{{.*}} : vector<8xi8> to vector<8xf32> +// CHECK-SAME: %[[IN:.*]]: vector<8xi4>) -> vector<8xf32> { +// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<4xi8> +// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8xi4> to vector<4xi8> +// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<4xi8> +// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<4xi8> +// CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8xi8> to vector<8xf32> %0 = arith.sitofp %a : vector<8xi4> to vector<8xf32> return %0 : vector<8xf32> } +// CHECK-LABEL: func.func @aligned_sitofp_2d( +func.func @aligned_sitofp_2d(%a: vector<8x32xi4>) -> vector<8x32xf32> { +// CHECK-SAME: %[[IN:.*]]: vector<8x32xi4>) -> vector<8x32xf32> { +// CHECK: %[[I4_BITS:.*]] = arith.constant dense<4> : vector<8x16xi8> +// CHECK: %[[BITCAST:.*]] = vector.bitcast %[[IN]] : vector<8x32xi4> to vector<8x16xi8> +// CHECK: %[[SHL_LOW:.*]] = arith.shli %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[LOW:.*]] = arith.shrsi %[[SHL_LOW]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[HIGH:.*]] = arith.shrsi %[[BITCAST]], %[[I4_BITS]] : vector<8x16xi8> +// CHECK: %[[INTERLEAVE:.*]] = vector.interleave %[[LOW]], %[[HIGH]] : vector<8x16xi8> +// CHECK: %[[F32:.*]] = arith.sitofp %[[INTERLEAVE]] : vector<8x32xi8> to vector<8x32xf32> + %0 = arith.sitofp %a : vector<8x32xi4> to vector<8x32xf32> + return %0 : vector<8x32xf32> +} + // CHECK-LABEL: func.func @i4_transpose( -// CHECK-SAME: %[[A:[0-9a-z]*]] func.func @i4_transpose(%a: vector<8x16xi4>) -> vector<16x8xi4> { - // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi4> to vector<8x16xi8> - // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8> - // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4> +// CHECK-SAME: %[[IN:.*]]: vector<8x16xi4>) -> vector<16x8xi4> { +// CHECK: %[[EXT:.*]] = vector.interleave +// CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8> +// CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi4> %0 = vector.transpose %a, [1, 0] : vector<8x16xi4> to vector<16x8xi4> return %0 : vector<16x8xi4> } // CHECK-LABEL: func.func @i7_transpose( -// CHECK-SAME: %[[A:[0-9a-z]*]] func.func @i7_transpose(%a: vector<8x16xi7>) -> vector<16x8xi7> { - // CHECK: %[[EXT:.*]] = arith.extsi %[[A]] : vector<8x16xi7> to vector<8x16xi8> - // CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8> - // CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7> +// CHECK-SAME: %[[IN:.*]]: vector<8x16xi7>) -> vector<16x8xi7> { +// CHECK: %[[EXT:.*]] = arith.extsi %[[IN]] : vector<8x16xi7> to vector<8x16xi8> +// CHECK: %[[TRANS:.*]] = vector.transpose %[[EXT]], [1, 0] : vector<8x16xi8> to vector<16x8xi8> +// CHECK: %[[TRUNC:.*]] = arith.trunci %[[TRANS]] : vector<16x8xi8> to vector<16x8xi7> %0 = vector.transpose %a, [1, 0] : vector<8x16xi7> to vector<16x8xi7> return %0 : vector<16x8xi7> } diff --git a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir index 1775b5f..788ae9a 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-flatten.mlir @@ -83,7 +83,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices( return } -// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 * 43)> +// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0, s1] -> (s0 * 24 + s1 * 6)> // CHECK-LABEL: func.func @transfer_read_dims_mismatch_non_zero_indices( // CHECK-SAME: %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index, @@ -92,7 +92,7 @@ func.func @transfer_read_dims_mismatch_non_zero_indices( // CHECK: %[[C_0:.*]] = arith.constant 0 : i32 // CHECK: %[[C_0_IDX:.*]] = arith.constant 0 : index // CHECK: %[[COLLAPSED_IN:.*]] = memref.collapse_shape %[[M_IN]] {{\[}}[0], [1, 2, 3]] : memref<1x43x4x6xi32> into memref<1x1032xi32> -// CHECK: %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_2]], %[[IDX_1]]] +// CHECK: %[[COLLAPSED_IDX:.*]] = affine.apply #[[$ATTR_0]]()[%[[IDX_1]], %[[IDX_2]]] // CHECK: %[[READ:.*]] = vector.transfer_read %[[COLLAPSED_IN]][%[[C_0_IDX]], %[[COLLAPSED_IDX]]], %[[C_0]] {in_bounds = [true]} : memref<1x1032xi32>, vector<12xi32> // CHECK: %[[COLLAPSED_OUT:.*]] = memref.collapse_shape %[[M_OUT]] {{\[}}[0, 1, 2]] : memref<1x2x6xi32> into memref<12xi32> // CHECK: vector.transfer_write %[[READ]], %[[COLLAPSED_OUT]][%[[C_0_IDX]]] {in_bounds = [true]} : vector<12xi32>, memref<12xi32> @@ -459,3 +459,37 @@ func.func @fold_unit_dims_entirely(%arg0 : vector<8xi32>, // CHECK-128B-LABEL: func @fold_unit_dims_entirely( // CHECK-128B-NOT: memref.collapse_shape + +// ----- + +func.func @regression_non_contiguous_dim_read(%subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>, + %idx0 : index, %idx1 : index) -> vector<2x2xf32> { + %c0 = arith.constant 0 : index + %cst_1 = arith.constant 0.000000e+00 : f32 + %8 = vector.transfer_read %subview[%c0, %idx0, %idx1, %c0], %cst_1 {in_bounds = [true, true]} : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>, vector<2x2xf32> + return %8 : vector<2x2xf32> +} + +// CHECK: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 * 2)> +// CHECK-LABEL: func.func @regression_non_contiguous_dim_read( +// CHECK: %[[COLLAPSE:.+]] = memref.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> into memref<1x3x6xf32, strided<[40, 10, 1], offset: ?>> +// CHECK: %[[APPLY:.*]] = affine.apply #[[$MAP]]() + +// CHECK-128B-LABEL: func @regression_non_contiguous_dim_read( +// CHECK-128B: memref.collapse_shape + +// ----- + +func.func @unsupported_non_contiguous_dim_write(%value : vector<2x2xf32>, + %subview : memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>>, + %idx0 : index, %idx1 : index) { + %c0 = arith.constant 0 : index + vector.transfer_write %value, %subview[%c0, %idx0, %idx1, %c0] {in_bounds = [true, true]} : vector<2x2xf32>, memref<1x3x3x2xf32, strided<[40, 10, 2, 1], offset: ?>> + return +} + +// CHECK-LABEL: func.func @unsupported_non_contiguous_dim_write( +// CHECK-NOT: memref.collapse_shape + +// CHECK-128B-LABEL: func @unsupported_non_contiguous_dim_write( +// CHECK-128B-NOT: memref.collapse_shape diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir index 44ff1af..12f13e8 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir @@ -1,13 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -transform-interpreter \ -// RUN: -test-transform-dialect-erase-schedule \ +// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \ // RUN: -lower-vector-mask \ // RUN: -one-shot-bufferize="bufferize-function-boundaries" \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// RUN: -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -convert-arm-sme-to-llvm -cse -canonicalize \ -// RUN: -test-lower-to-llvm | \ +// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=entry -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir index c781d5e..34c5351 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir @@ -1,12 +1,7 @@ // RUN: mlir-opt %s \ // RUN: -transform-interpreter -test-transform-dialect-erase-schedule \ -// RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \ -// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \ -// RUN: -convert-arm-sme-to-llvm \ -// RUN: -convert-vector-to-llvm=enable-arm-sve \ -// RUN: -cse -canonicalize -test-lower-to-llvm | \ +// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ +// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=main -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir index 31c3202..2bfdaa8 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir @@ -1,12 +1,6 @@ // RUN: mlir-opt %s \ // RUN: -transform-interpreter -test-transform-dialect-erase-schedule \ -// RUN: -canonicalize \ -// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \ -// RUN: -convert-arm-sme-to-llvm \ -// RUN: -convert-vector-to-llvm=enable-arm-sve \ -// RUN: -cse -canonicalize -test-lower-to-llvm | \ +// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=main -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir index d5c3506..e376bdd 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/multi-tile-matmul.mlir @@ -1,11 +1,7 @@ // RUN: mlir-opt %s \ // RUN: -transform-interpreter -test-transform-dialect-erase-schedule \ // RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \ -// RUN: -arm-sme-vector-legalization -canonicalize -cse \ -// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// RUN: -convert-vector-to-scf=full-unroll -convert-arm-sme-to-llvm \ -// RUN: -test-lower-to-llvm | \ +// RUN: -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=main -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir index 42fe21c..ee3866de 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/use-too-many-tiles.mlir @@ -1,10 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// RUN: -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \ -// RUN: -convert-arm-sme-to-llvm -convert-vector-to-llvm=enable-arm-sve -cse \ -// RUN: -canonicalize -test-lower-to-llvm -verify-diagnostics | \ +// RUN: -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=main -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir index 59b4a7e..06b1c10 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir @@ -1,9 +1,5 @@ // DEFINE: %{entry_point} = test_load_store_zaq0 -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir index 064141c..27be801 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir @@ -1,9 +1,5 @@ // DEFINE: %{entry_point} = entry -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir index 0827d9b..9d836d9 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-multi-tile-transpose.mlir @@ -1,10 +1,4 @@ -// RUN: mlir-opt %s -arm-sme-vector-legalization -cse -canonicalize \ -// RUN: -convert-vector-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf \ -// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \ -// RUN: -convert-arm-sme-to-llvm \ -// RUN: -convert-vector-to-llvm=enable-arm-sve \ -// RUN: -cse -canonicalize -test-lower-to-llvm | \ +// RUN: mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -e=main -entry-point-result=void \ // RUN: -march=aarch64 -mattr="+sve,+sme" \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir index f081838..a06ad37 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f16f16f32.mlir @@ -1,11 +1,7 @@ +// DEFINE: %{opts} = // DEFINE: %{entry} = main -// DEFINE: %{fusion_opts} = -arm-sme-outer-product-fusion // DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme %{fusion_opts} \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm -o %t +// DEFINE: -test-lower-to-arm-sme=%{opts} -test-lower-to-llvm -o %t // DEFINE: %{run} = %mcr_aarch64_cmd %t \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry} -entry-point-result=void \ @@ -18,7 +14,7 @@ // Check result is the same when outerproducts are not combined into widening // variant. -// REDEFINE: %{fusion_opts} = +// REDEFINE: %{opts} = fuse-outer-products=false // RUN: %{run} | FileCheck %s func.func @main() { diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir index 5f41b37..7e7869d 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir @@ -1,10 +1,6 @@ // DEFINE: %{entry_point} = test_outerproduct_no_accumulator_4x4xf32 // DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm -o %t +// DEFINE: -test-lower-to-arm-sme -test-lower-to-llvm -o %t // DEFINE: %{run} = %mcr_aarch64_cmd %t \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir index a1bb9b7..46bf799 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir @@ -1,10 +1,6 @@ // DEFINE: %{entry_point} = test_outerproduct_no_accumulator_2x2xf64 // DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm -o %t +// DEFINE: -test-lower-to-arm-sme -test-lower-to-llvm -o %t // DEFINE: %{run} = %mcr_aarch64_cmd %t \ // DEFINE: -march=aarch64 -mattr=+sve,+sme-f64f64 \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir index 1770e57..9a353ec 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-i8i8i32.mlir @@ -1,11 +1,5 @@ // DEFINE: %{entry} = main -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// DEFINE: -arm-sme-outer-product-fusion \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir index 6e028d5..52f5688 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir @@ -1,9 +1,5 @@ // DEFINE: %{entry_point} = entry -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir index c0c1f55..710cc66 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir @@ -1,10 +1,5 @@ // DEFINE: %{entry_point} = entry -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za only-if-required-by-ops" \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir index eee3c56..88bc0d0 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir @@ -1,9 +1,5 @@ // DEFINE: %{entry_point} = entry -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir index 223bc8c..e149174 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir @@ -1,8 +1,4 @@ -// RUN: mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// RUN: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// RUN: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// RUN: -convert-arm-sme-to-llvm -cse -canonicalize \ -// RUN: -test-lower-to-llvm | \ +// RUN: mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm | \ // RUN: %mcr_aarch64_cmd \ // RUN: -march=aarch64 -mattr=+sve,+sme \ // RUN: -e entry -entry-point-result=i32 \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir index 2f151e2..b29790db 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir @@ -1,9 +1,5 @@ // DEFINE: %{entry_point} = za0_d_f64 -// DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -cse -canonicalize \ -// DEFINE: -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=i32 \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir index f28bf19..c8c401b 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir @@ -1,8 +1,5 @@ // DEFINE: %{entry_point} = entry -// DEFINE: %{compile} = mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \ -// DEFINE: -convert-vector-to-arm-sme -convert-arith-to-arm-sme \ -// DEFINE: -convert-arm-sme-to-scf -allocate-arm-sme-tiles \ -// DEFINE: -convert-arm-sme-to-llvm -test-lower-to-llvm +// DEFINE: %{compile} = mlir-opt %s -test-lower-to-arm-sme -test-lower-to-llvm // DEFINE: %{run} = %mcr_aarch64_cmd \ // DEFINE: -march=aarch64 -mattr=+sve,+sme \ // DEFINE: -e %{entry_point} -entry-point-result=i32 \ diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir new file mode 100644 index 0000000..07989bd --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/test-scalable-interleave.mlir @@ -0,0 +1,25 @@ +// RUN: mlir-opt %s -test-lower-to-llvm | \ +// RUN: %mcr_aarch64_cmd -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_c_runner_utils,%mlir_arm_runner_utils \ +// RUN: -march=aarch64 -mattr=+sve | \ +// RUN: FileCheck %s + +func.func @entry() { + %f1 = arith.constant 1.0 : f32 + %f2 = arith.constant 2.0 : f32 + %v1 = vector.splat %f1 : vector<[4]xf32> + %v2 = vector.splat %f2 : vector<[4]xf32> + vector.print %v1 : vector<[4]xf32> + vector.print %v2 : vector<[4]xf32> + // + // Test vectors: + // + // CHECK: ( 1, 1, 1, 1 + // CHECK: ( 2, 2, 2, 2 + + %v3 = vector.interleave %v1, %v2 : vector<[4]xf32> + vector.print %v3 : vector<[8]xf32> + // CHECK: ( 1, 2, 1, 2, 1, 2, 1, 2 + + return +} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir new file mode 100644 index 0000000..0bc78af --- /dev/null +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-interleave.mlir @@ -0,0 +1,24 @@ +// RUN: mlir-opt %s -test-lower-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s + +func.func @entry() { + %f1 = arith.constant 1.0 : f32 + %f2 = arith.constant 2.0 : f32 + %v1 = vector.splat %f1 : vector<2x4xf32> + %v2 = vector.splat %f2 : vector<2x4xf32> + vector.print %v1 : vector<2x4xf32> + vector.print %v2 : vector<2x4xf32> + // + // Test vectors: + // + // CHECK: ( ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ) ) + // CHECK: ( ( 2, 2, 2, 2 ), ( 2, 2, 2, 2 ) ) + + %v3 = vector.interleave %v1, %v2 : vector<2x4xf32> + vector.print %v3 : vector<2x8xf32> + // CHECK: ( ( 1, 2, 1, 2, 1, 2, 1, 2 ), ( 1, 2, 1, 2, 1, 2, 1, 2 ) ) + + return +} diff --git a/mlir/test/Target/LLVMIR/Import/import-failure.ll b/mlir/test/Target/LLVMIR/Import/import-failure.ll index 0962134..9a4e939 100644 --- a/mlir/test/Target/LLVMIR/Import/import-failure.ll +++ b/mlir/test/Target/LLVMIR/Import/import-failure.ll @@ -59,13 +59,15 @@ define void @unhandled_intrinsic() gc "example" { ; // ----- +; Check that debug intrinsics with an unsupported argument are dropped. + declare void @llvm.dbg.value(metadata, metadata, metadata) ; CHECK: import-failure.ll -; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !DIArgList(i64 %arg1, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value)), !dbg !5 +; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !DIArgList(i64 %{{.*}}, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value)) ; CHECK: import-failure.ll -; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression()), !dbg !5 -define void @dropped_instruction(i64 %arg1) { +; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression()) +define void @unsupported_argument(i64 %arg1) { call void @llvm.dbg.value(metadata !DIArgList(i64 %arg1, i64 undef), metadata !3, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 1, DW_OP_mul, DW_OP_plus, DW_OP_stack_value)), !dbg !5 call void @llvm.dbg.value(metadata !6, metadata !3, metadata !DIExpression()), !dbg !5 ret void @@ -83,6 +85,38 @@ define void @dropped_instruction(i64 %arg1) { ; // ----- +; Check that debug intrinsics that depend on cyclic metadata are dropped. + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +; CHECK: import-failure.ll +; CHECK-SAME: warning: dropped instruction: call void @llvm.dbg.label(metadata !{{.*}}) +; CHECK: import-failure.ll +; CHECK-SAME: warning: dropped intrinsic: call void @llvm.dbg.value(metadata i64 %{{.*}}, metadata !3, metadata !DIExpression()) +define void @cylic_metadata(i64 %arg1) { + call void @llvm.dbg.value(metadata i64 %arg1, metadata !10, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.label(metadata !13), !dbg !14 + ret void +} + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +!2 = !DIFile(filename: "import-failure.ll", directory: "/") +!3 = !DICompositeType(tag: DW_TAG_array_type, size: 42, baseType: !4) +!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3) +!5 = distinct !DISubprogram(name: "class_method", scope: !2, file: !2, type: !6, spFlags: DISPFlagDefinition, unit: !1) +!6 = !DISubroutineType(types: !7) +!7 = !{!3} +!10 = !DILocalVariable(scope: !5, name: "arg1", file: !2, line: 1, arg: 1, align: 64); +!11 = !DILexicalBlock(scope: !5) +!12 = !DILexicalBlockFile(scope: !11, discriminator: 0) +!13 = !DILabel(scope: !12, name: "label", file: !2, line: 42) +!14 = !DILocation(line: 1, column: 2, scope: !5) + +; // ----- + ; global_dtors with non-null data fields cannot be represented in MLIR. ; CHECK: <unknown> ; CHECK-SAME: error: unhandled global variable: @llvm.global_dtors diff --git a/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt new file mode 100644 index 0000000..e942c7b --- /dev/null +++ b/mlir/test/lib/Dialect/ArmSME/CMakeLists.txt @@ -0,0 +1,18 @@ +# Exclude tests from libMLIR.so +add_mlir_library(MLIRArmSMETestPasses + TestLowerToArmSME.cpp + + EXCLUDE_FROM_LIBMLIR + + LINK_LIBS PUBLIC + MLIRArithToArmSME + MLIRArmSMEToLLVM + MLIRArmSMEToSCF + MLIRArmSMETransforms + MLIRArmSVETransforms + MLIRIR + MLIRPass + MLIRTransforms + MLIRVectorToArmSME + MLIRVectorToSCF + ) diff --git a/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp b/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp new file mode 100644 index 0000000..48d4a58 --- /dev/null +++ b/mlir/test/lib/Dialect/ArmSME/TestLowerToArmSME.cpp @@ -0,0 +1,99 @@ +//===- TestLowerToArmSME.cpp - Test lowering to ArmSME as a sink pass -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass for testing the lowering to ArmSME as a +// generally usable sink pass. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h" +#include "mlir/Conversion/ArmSMEToLLVM/ArmSMEToLLVM.h" +#include "mlir/Conversion/ArmSMEToSCF/ArmSMEToSCF.h" +#include "mlir/Conversion/VectorToArmSME/VectorToArmSME.h" +#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Dialect/ArmSME/Transforms/Passes.h" +#include "mlir/Dialect/ArmSVE/Transforms/Passes.h" +#include "mlir/IR/DialectRegistry.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassOptions.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; + +namespace { +struct TestLowerToArmSMEOptions + : public PassPipelineOptions<TestLowerToArmSMEOptions> { + PassOptions::Option<bool> fuseOuterProducts{ + *this, "fuse-outer-products", + llvm::cl::desc("Fuse outer product operations via " + "'-arm-sme-outer-product-fusion' pass"), + llvm::cl::init(true)}; +}; + +void buildTestLowerToArmSME(OpPassManager &pm, + const TestLowerToArmSMEOptions &options) { + // Legalize vector operations so they can be converted to ArmSME. + pm.addPass(arm_sme::createVectorLegalizationPass()); + + // Sprinkle some cleanups. + pm.addPass(createCanonicalizerPass()); + pm.addPass(createCSEPass()); + + // Passes that convert operations on vectors to ArmSME operations. + + // Convert Arith to ArmSME. + pm.addPass(createArithToArmSMEConversionPass()); + // Convert Vector to ArmSME. + pm.addPass(createConvertVectorToArmSMEPass()); + + // Fuse outer products. + if (options.fuseOuterProducts) + pm.addPass(arm_sme::createOuterProductFusionPass()); + + // Convert operations on high-level vectors to loops. + + // Convert ArmSME to SCF. + pm.addPass(createConvertArmSMEToSCFPass()); + + // Convert Vector to SCF (with full unroll enabled). + pm.addPass(createConvertVectorToSCFPass( + VectorTransferToSCFOptions().enableFullUnroll())); + + // Allocate tiles for ArmSME operations. + // + // Later passes may create further ArmSME ops that implement the + // ArmSMETileOpInterface, but tiles are allocated for root operations, + // all of which should now exist. + pm.addPass(arm_sme::createTileAllocationPass()); + + // Enable streaming-mode and ZA. + pm.addPass(arm_sme::createEnableArmStreamingPass( + arm_sme::ArmStreamingMode::StreamingLocally, arm_sme::ArmZaMode::NewZA, + /*onlyIfRequiredByOps=*/true)); + + // Convert ArmSME to LLVM. + pm.addPass(createConvertArmSMEToLLVMPass()); + + // Sprinkle some cleanups. + pm.addPass(createCanonicalizerPass()); + pm.addPass(createCSEPass()); +} +} // namespace + +namespace mlir { +namespace test { +void registerTestLowerToArmSME() { + PassPipelineRegistration<TestLowerToArmSMEOptions>( + "test-lower-to-arm-sme", + "An example pipeline to lower operations on vectors (arith, vector) to " + "LLVM via ArmSME.", + buildTestLowerToArmSME); +} +} // namespace test +} // namespace mlir diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt index 30a17c2..e20cd44 100644 --- a/mlir/test/lib/Dialect/CMakeLists.txt +++ b/mlir/test/lib/Dialect/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(Affine) add_subdirectory(Arith) +add_subdirectory(ArmSME) add_subdirectory(Bufferization) add_subdirectory(ControlFlow) add_subdirectory(DLTI) diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 108cfe8..bde4255 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1152,8 +1152,10 @@ struct TestLegalizePatternDriver // Handle a partial conversion. if (mode == ConversionMode::Partial) { DenseSet<Operation *> unlegalizedOps; - if (failed(applyPartialConversion( - getOperation(), target, std::move(patterns), &unlegalizedOps))) { + ConversionConfig config; + config.unlegalizedOps = &unlegalizedOps; + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns), config))) { getOperation()->emitRemark() << "applyPartialConversion failed"; } // Emit remarks for each legalizable operation. @@ -1181,8 +1183,10 @@ struct TestLegalizePatternDriver // Analyze the convertible operations. DenseSet<Operation *> legalizedOps; + ConversionConfig config; + config.legalizableOps = &legalizedOps; if (failed(applyAnalysisConversion(getOperation(), target, - std::move(patterns), legalizedOps))) + std::move(patterns), config))) return signalPassFailure(); // Emit remarks for each legalizable operation. @@ -1806,8 +1810,10 @@ struct TestMergeBlocksPatternDriver }); DenseSet<Operation *> unlegalizedOps; + ConversionConfig config; + config.unlegalizedOps = &unlegalizedOps; (void)applyPartialConversion(getOperation(), target, std::move(patterns), - &unlegalizedOps); + config); for (auto *op : unlegalizedOps) op->emitRemark() << "op '" << op->getName() << "' is not legalizable"; } |