aboutsummaryrefslogtreecommitdiff
path: root/mlir/test
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test')
-rw-r--r--mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir67
-rw-r--r--mlir/test/Analysis/test-alias-analysis.mlir16
-rw-r--r--mlir/test/Conversion/MathToXeVM/lit.local.cfg7
-rw-r--r--mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir155
-rw-r--r--mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir119
-rw-r--r--mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir86
-rw-r--r--mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir29
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir6
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir32
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir38
-rw-r--r--mlir/test/Dialect/LLVMIR/bytecode.mlir35
-rw-r--r--mlir/test/Dialect/LLVMIR/debuginfo.mlir1
-rw-r--r--mlir/test/Dialect/LLVMIR/roundtrip.mlir6
-rw-r--r--mlir/test/Dialect/Linalg/match-ops-interpreter.mlir14
-rw-r--r--mlir/test/Dialect/Linalg/one-shot-bufferize.mlir16
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir102
-rw-r--r--mlir/test/Dialect/OpenACC/recipe-populate-private.mlir82
-rw-r--r--mlir/test/Dialect/SCF/one-shot-bufferize.mlir12
-rw-r--r--mlir/test/Dialect/Tensor/one-shot-bufferize.mlir12
-rw-r--r--mlir/test/Dialect/Vector/canonicalize/vector-step.mlir311
-rw-r--r--mlir/test/Dialect/Vector/vector-unroll-options.mlir68
-rw-r--r--mlir/test/Dialect/Vector/vector-warp-distribute.mlir19
-rw-r--r--mlir/test/Dialect/XeGPU/xegpu-blocking.mlir98
-rw-r--r--mlir/test/Integration/GPU/SPIRV/simple_add.mlir11
-rw-r--r--mlir/test/Pass/remark-final.mlir17
-rw-r--r--mlir/test/Target/LLVMIR/Import/debug-info.ll3
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir12
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir-invalid.mlir18
-rw-r--r--mlir/test/Target/LLVMIR/nvvmir.mlir4
-rw-r--r--mlir/test/lib/Analysis/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp86
-rw-r--r--mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp26
-rw-r--r--mlir/test/lib/Dialect/OpenACC/CMakeLists.txt1
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp6
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp8
-rw-r--r--mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp110
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttrDefs.td17
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.cpp18
-rw-r--r--mlir/test/lib/Dialect/Test/TestAttributes.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.h1
-rw-r--r--mlir/test/lib/Dialect/Test/TestDialect.td5
-rw-r--r--mlir/test/lib/Dialect/Test/TestOpDefs.cpp44
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td15
-rw-r--r--mlir/test/lib/Pass/TestRemarksPass.cpp7
-rw-r--r--mlir/test/mlir-tblgen/cpp-class-comments.td10
-rw-r--r--mlir/test/mlir-tblgen/dialect.td5
46 files changed, 1673 insertions, 84 deletions
diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
new file mode 100644
index 0000000..808c1c2
--- /dev/null
+++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s
+
+func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c2 = arith.constant 2 : index
+ %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index
+ %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index
+
+ // Test subview with unknown sizes, and constant offsets and strides.
+ // CHECK: Op: %[[SV0:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}]
+ // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}]
+ %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with bounded dynamic offsets.
+ // CHECK: Op: %[[SV1:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a subview of a subview, with constant operands.
+ // CHECK: Op: %[[SV2:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}]
+ // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}]
+ // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}]
+ %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+
+ // Test a rank-reducing subview.
+ // CHECK: Op: %[[SV3:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>>
+
+ // Test a subview of a rank-reducing subview
+ // CHECK: Op: %[[SV4:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}]
+ // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>>
+
+ // Test a subview with mixed bounded and unbound dynamic sizes.
+ // CHECK: Op: %[[SV5:.*]] = memref.subview
+ // CHECK-NEXT: result[0]: strided_metadata<
+ // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}]
+ // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}]
+ // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}]
+ %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>
+ return
+}
+
+// CHECK: func.func @memref_subview
+// CHECK: %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>>
+// CHECK: %[[SV0]] = memref.subview %[[A0]]
+// CHECK-NEXT: %[[SV1]] = memref.subview
+// CHECK-NEXT: %[[SV2]] = memref.subview
+// CHECK-NEXT: %[[SV3]] = memref.subview
+// CHECK-NEXT: %[[SV4]] = memref.subview
+// CHECK-NEXT: %[[SV5]] = memref.subview
diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir
index 8cbee61..d71adee 100644
--- a/mlir/test/Analysis/test-alias-analysis.mlir
+++ b/mlir/test/Analysis/test-alias-analysis.mlir
@@ -256,3 +256,19 @@ func.func @constants(%arg: memref<2xf32>) attributes {test.ptr = "func"} {
return
}
+
+// -----
+
+// CHECK-LABEL: Testing : "distinct_objects"
+// CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias
+
+// CHECK-DAG: distinct#0 <-> distinct#1: NoAlias
+// CHECK-DAG: distinct#0 <-> func.region0#0: MustAlias
+// CHECK-DAG: distinct#1 <-> func.region0#0: MayAlias
+// CHECK-DAG: distinct#0 <-> func.region0#1: MayAlias
+// CHECK-DAG: distinct#1 <-> func.region0#1: MustAlias
+
+func.func @distinct_objects(%arg: memref<?xf32>, %arg1: memref<?xf32>) attributes {test.ptr = "func"} {
+ %0, %1 = memref.distinct_objects %arg, %arg1 {test.ptr = "distinct"} : memref<?xf32>, memref<?xf32>
+ return
+}
diff --git a/mlir/test/Conversion/MathToXeVM/lit.local.cfg b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
new file mode 100644
index 0000000..cc1ce35
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/lit.local.cfg
@@ -0,0 +1,7 @@
+spirv_backend_tests = [
+ 'native-spirv-builtins.mlir',
+]
+
+# Exclude SPIRV backend tests if SPIRV target is disabled:
+if(not config.run_xevm_tests):
+ config.excludes.update(spirv_backend_tests)
diff --git a/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
new file mode 100644
index 0000000..d76627b
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir
@@ -0,0 +1,155 @@
+// RUN: mlir-opt %s -convert-math-to-xevm \
+// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-ARITH'
+// RUN: mlir-opt %s -convert-math-to-xevm='convert-arith=false' \
+// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-NO-ARITH'
+
+module @test_module {
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+ //
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv2_d(vector<2xf64>) -> vector<2xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv3_d(vector<3xf64>) -> vector<3xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_d(vector<4xf64>) -> vector<4xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv8_d(vector<8xf64>) -> vector<8xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_d(vector<16xf64>) -> vector<16xf64>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_f(vector<16xf32>) -> vector<16xf32>
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_Dh(vector<4xf16>) -> vector<4xf16>
+ //
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_log10d(f64) -> f64
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_powrDhDh(f16, f16) -> f16
+ // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+ // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+ // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+ // CHECK-ARITH-DAG: llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+
+ // CHECK-LABEL: func @math_ops
+ func.func @math_ops() {
+
+ %c1_f16 = arith.constant 1. : f16
+ %c1_f32 = arith.constant 1. : f32
+ %c1_f64 = arith.constant 1. : f64
+
+ // CHECK: math.exp
+ %exp_normal_f16 = math.exp %c1_f16 : f16
+ // CHECK: math.exp
+ %exp_normal_f32 = math.exp %c1_f32 : f32
+ // CHECK: math.exp
+ %exp_normal_f64 = math.exp %c1_f64 : f64
+
+ // Check float operations are converted properly:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+ %exp_fast_f16 = math.exp %c1_f16 fastmath<fast> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f32) -> f32
+ %exp_fast_f32 = math.exp %c1_f32 fastmath<fast> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f64) -> f64
+ %exp_fast_f64 = math.exp %c1_f64 fastmath<fast> : f64
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %exp_afn_f16 = math.exp %c1_f16 fastmath<afn> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %exp_afn_f32 = math.exp %c1_f32 fastmath<afn> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %exp_afn_f64 = math.exp %c1_f64 fastmath<afn> : f64
+
+ // CHECK: math.exp
+ %exp_none_f16 = math.exp %c1_f16 fastmath<none> : f16
+ // CHECK: math.exp
+ %exp_none_f32 = math.exp %c1_f32 fastmath<none> : f32
+ // CHECK: math.exp
+ %exp_none_f64 = math.exp %c1_f64 fastmath<none> : f64
+
+ // Check vector operations:
+
+ %v2_c1_f64 = arith.constant dense<1.> : vector<2xf64>
+ %v3_c1_f64 = arith.constant dense<1.> : vector<3xf64>
+ %v4_c1_f64 = arith.constant dense<1.> : vector<4xf64>
+ %v8_c1_f64 = arith.constant dense<1.> : vector<8xf64>
+ %v16_c1_f64 = arith.constant dense<1.> : vector<16xf64>
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv2_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<2xf64>) -> vector<2xf64>
+ %exp_v2_f64 = math.exp %v2_c1_f64 fastmath<afn> : vector<2xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv3_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<3xf64>) -> vector<3xf64>
+ %exp_v3_f64 = math.exp %v3_c1_f64 fastmath<afn> : vector<3xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<4xf64>) -> vector<4xf64>
+ %exp_v4_f64 = math.exp %v4_c1_f64 fastmath<afn> : vector<4xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv8_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+ %exp_v8_f64 = math.exp %v8_c1_f64 fastmath<afn> : vector<8xf64>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf64>) -> vector<16xf64>
+ %exp_v16_f64 = math.exp %v16_c1_f64 fastmath<afn> : vector<16xf64>
+
+ %v16_c1_f32 = arith.constant dense<1.> : vector<16xf32>
+ %v4_c1_f16 = arith.constant dense<1.> : vector<4xf16>
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_f(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<16xf32>) -> vector<16xf32>
+ %exp_v16_f32 = math.exp %v16_c1_f32 fastmath<fast> : vector<16xf32>
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_Dh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<4xf16>) -> vector<4xf16>
+ %exp_v4_f16 = math.exp %v4_c1_f16 fastmath<fast> : vector<4xf16>
+
+ // Check unsupported vector sizes are not converted:
+
+ %v5_c1_f64 = arith.constant dense<1.> : vector<5xf64>
+ %v32_c1_f64 = arith.constant dense<1.> : vector<32xf64>
+
+ // CHECK: math.exp
+ %exp_v5_f64 = math.exp %v5_c1_f64 fastmath<afn> : vector<5xf64>
+ // CHECK: math.exp
+ %exp_v32_f64 = math.exp %v32_c1_f64 fastmath<afn> : vector<32xf64>
+
+ // Check fastmath flags propagate properly:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16
+ %exp_fastmath_all_f16 = math.exp %c1_f16 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f16
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, ninf, nsz, arcp, contract, afn>} : (f32) -> f32
+ %exp_fastmath_most_f32 = math.exp %c1_f32 fastmath<nnan,ninf,nsz,arcp,contract,afn> : f32
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, afn, reassoc>} : (f32) -> f32
+ %exp_afn_reassoc_nnan_f32 = math.exp %c1_f32 fastmath<afn,reassoc,nnan> : f32
+
+ // Check all other math operations:
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_cosDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %cos_afn_f16 = math.cos %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_exp2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %exp2_afn_f32 = math.exp2 %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_logDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %log_afn_f16 = math.log %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_log2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %log2_afn_f32 = math.log2 %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z24__spirv_ocl_native_log10d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %log10_afn_f64 = math.log10 %c1_f64 fastmath<afn> : f64
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_powrDhDh(%{{.*}}, %{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16, f16) -> f16
+ %powr_afn_f16 = math.powf %c1_f16, %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z24__spirv_ocl_native_rsqrtd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %rsqrt_afn_f64 = math.rsqrt %c1_f64 fastmath<afn> : f64
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_sinDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ %sin_afn_f16 = math.sin %c1_f16 fastmath<afn> : f16
+
+ // CHECK: llvm.call @_Z23__spirv_ocl_native_sqrtf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ %sqrt_afn_f32 = math.sqrt %c1_f32 fastmath<afn> : f32
+
+ // CHECK: llvm.call @_Z22__spirv_ocl_native_tand(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ %tan_afn_f64 = math.tan %c1_f64 fastmath<afn> : f64
+
+ %c6_9_f32 = arith.constant 6.9 : f32
+ %c7_f32 = arith.constant 7. : f32
+
+ // CHECK-ARITH: llvm.call @_Z25__spirv_ocl_native_divideff(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+ // CHECK-NO-ARITH: arith.divf
+ %divf_afn_f32 = arith.divf %c6_9_f32, %c7_f32 fastmath<afn> : f32
+
+ return
+ }
+}
diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
new file mode 100644
index 0000000..82426c4
--- /dev/null
+++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir
@@ -0,0 +1,119 @@
+// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \
+// RUN: -debug-only=serialize-to-isa 2> %t
+// RUN: FileCheck --input-file=%t %s
+// REQUIRES: asserts
+//
+// MathToXeVM pass generates OpenCL intrinsics function calls when converting
+// Math ops with `fastmath` attr to native function calls. It is assumed that
+// the SPIRV backend would correctly convert these intrinsics calls to OpenCL
+// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp).
+//
+// To ensure this assumption holds, this test verifies that the SPIRV backend
+// behaves as expected.
+
+module @test_ocl_intrinsics attributes {gpu.container_module} {
+ gpu.module @kernel [#xevm.target] {
+ llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} {
+ // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16
+ // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]]
+ %c0_f16 = llvm.mlir.constant(0. : f16) : f16
+ // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32
+ // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]]
+ %c0_f32 = llvm.mlir.constant(0. : f32) : f32
+ // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64
+ // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]]
+ %c0_f64 = llvm.mlir.constant(0. : f64) : f64
+
+ // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2
+ // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]]
+ %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64>
+ // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3
+ // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]]
+ %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32>
+ // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4
+ // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]]
+ %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64>
+ // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8
+ // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]]
+ %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64>
+ // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16
+ // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]]
+ %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16>
+
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]]
+ %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]]
+ %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]]
+ %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64
+
+ // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]]
+ %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64>
+ // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]]
+ %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32>
+ // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]]
+ %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64>
+ // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]]
+ %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64>
+ // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]]
+ %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16>
+
+ // SPIRV backend does not currently handle fastmath flags: The SPIRV
+ // backend would need to generate OpDecorate calls to decorate math ops
+ // with FPFastMathMode/FPFastMathModeINTEL decorations.
+ //
+ // FIXME: When support for fastmath flags in the SPIRV backend is added,
+ // add tests here to ensure fastmath flags are converted to the correct
+ // OpDecorate calls.
+ //
+ // See:
+ // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions
+ // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate
+
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]]
+ %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]]
+ %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]]
+ %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]]
+ %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]]
+ %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64>
+ // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]]
+ %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]]
+ %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]]
+ %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]]
+ %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32
+ // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]]
+ %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64
+ // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]]
+ %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32
+
+ llvm.return
+ }
+
+ llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16
+ llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64
+ llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32>
+ llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64>
+ llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16>
+ llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32
+ llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64>
+ llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16>
+ llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64
+ llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16
+ llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32
+ llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64
+ llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32
+ }
+}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index a7a73ae..780c25a 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1538,6 +1538,92 @@ func.func @unsupportedRescaleInexactRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8>
// -----
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const(%arg0 : tensor<2xi8>, %multiplier : tensor<1xi32>, %shift : tensor<1xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+ // CHECK: [[MULTIPLIER:%.+]] = tensor.collapse_shape %arg1 [] : tensor<1xi32> into tensor<i32>
+ // CHECK: [[SHIFT:%.+]] = tensor.collapse_shape %arg2 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[MULTIPLIER]], [[SHIFT]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<i32>, tensor<i8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c-128_i32 = arith.constant -128 : i32
+ // CHECK: %c127_i32 = arith.constant 127 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = false, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+ return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) {
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8):
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c-128_i32 = arith.constant -128 : i32
+ // CHECK: %c127_i32 = arith.constant 127 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8>
+ return %0 : tensor<2xi8>
+}
+
+// -----
+
+// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)>
+// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()>
+// CHECK-LABEL: @rescale_no_const_per_channel_input_output_zp_ui8
+// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]]
+// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]]
+func.func @rescale_no_const_per_channel_input_output_zp_ui8(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xui8>, %output_zp : tensor<1xui8>) -> (tensor<2xui8>) {
+ // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xui8> into tensor<ui8>
+ // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xui8> into tensor<ui8>
+ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xui8>
+ // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<ui8>, tensor<ui8>) outs([[INIT]] : tensor<2xui8>) {
+ // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: ui8, [[ARG4:%.*]]: ui8, [[OUT:%.*]]: ui8):
+ // CHECK: [[INPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG3]] : ui8 to i8
+ // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extui [[INPUT_ZP_I8]] : i8 to i32
+ // CHECK: [[OUTPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG4]] : ui8 to i8
+ // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extui [[OUTPUT_ZP_I8]] : i8 to i32
+ // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32
+ // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32
+ // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32
+ // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32
+ // CHECK: %c0_i32 = arith.constant 0 : i32
+ // CHECK: %c255_i32 = arith.constant 255 : i32
+ // CHECK: [[MAX:%.+]] = arith.maxsi %c0_i32, [[TMP3]] : i32
+ // CHECK: [[MIN:%.+]] = arith.minsi %c255_i32, [[MAX]] : i32
+ %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = true} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xui8>, tensor<1xui8>) -> tensor<2xui8>
+ return %0 : tensor<2xui8>
+}
+
+// -----
+
// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-LABEL: @reverse
diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
index 0b150e9..9c552d8 100644
--- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
+++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir
@@ -14,19 +14,36 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>)
// CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64
// CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1>
// CHECK: %[[VAR6:.*]] = scf.if %[[VAR2]] -> (f16) {
- // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> vector<1xf16>
- // CHECK: %[[VAR8:.*]] = vector.extract %[[VAR7]][0] : f16 from vector<1xf16>
- // CHECK: scf.yield %[[VAR8]] : f16
- // CHECK: } else {
- // CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf16>
- // CHECK: %[[VAR7:.*]] = vector.extract %[[CST_0]][0] : f16 from vector<1xf16>
+ // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16
// CHECK: scf.yield %[[VAR7]] : f16
+ // CHECK: } else {
+ // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16
+ // CHECK: scf.yield %[[CST_0]] : f16
// CHECK: }
%3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
: i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
gpu.return
}
}
+
+// -----
+gpu.module @test {
+// CHECK-LABEL: @source_materialize_single_elem_vec
+// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16>
+gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) {
+ %1 = arith.constant dense<1>: vector<1xi1>
+ %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+ : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16>
+ // CHECK: %[[VAR_IF:.*]] = scf.if
+ // CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16>
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16>
+ %c0 = arith.constant 0 : index
+ vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16>
+ gpu.return
+}
+}
+
// -----
gpu.module @test {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
index c58b153..21b508e 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
// -----
-func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> {
func.return %A : tensor<?xf32>
}
-// CHECK-LABEL: func @return_arg
+// CHECK-LABEL: func private @return_arg
// CHECK-SAME: %[[A:.*]]: memref<?xf32
// CHECK-NOT: return %[[A]]
-// NO-DROP-LABEL: func @return_arg
+// NO-DROP-LABEL: func private @return_arg
// NO-DROP-SAME: %[[A:.*]]: memref<?xf32
// NO-DROP: return %[[A]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
index 6054a61..d5f834b 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () {
// Bufferization of a function that is reading and writing. %t0 is writable, so
// no copy should be inserted.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
return %0, %1 : tensor<?xf32>, f32
}
-// CHECK-LABEL: func @call_func_with_non_tensor_return(
+// CHECK-LABEL: func private @call_func_with_non_tensor_return(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @call_func_with_non_tensor_return(
+func.func private @call_func_with_non_tensor_return(
%t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) {
// CHECK-NOT: alloc
// CHECK-NOT: copy
@@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return(
// Bufferization of a function that is reading and writing. %t0 is not writable,
// so a copy is needed.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
+func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) {
// CHECK-NOT: copy
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
@@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) {
// This function does not read, just write. We need an alloc, but no copy.
-// CHECK-LABEL: func @does_not_read(
+// CHECK-LABEL: func private @does_not_read(
// CHECK-NOT: alloc
// CHECK-NOT: copy
-func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
%f0 = arith.constant 0.0 : f32
%r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
return %r : tensor<?xf32>
@@ -354,9 +354,9 @@ func.func @main() {
// A write inside an scf.execute_region. An equivalent tensor is yielded.
-// CHECK-LABEL: func @execute_region_test(
+// CHECK-LABEL: func private @execute_region_test(
// CHECK-SAME: %[[m1:.*]]: memref<?xf32
-func.func @execute_region_test(%t1 : tensor<?xf32>)
+func.func private @execute_region_test(%t1 : tensor<?xf32>)
-> (f32, tensor<?xf32>, f32)
{
%f1 = arith.constant 0.0 : f32
@@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() {
// CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>)
func.func private @some_external_func(tensor<?xf32>)
-// CHECK: func @scf_for_with_tensor_insert_slice(
+// CHECK: func private @scf_for_with_tensor_insert_slice(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor_insert_slice(
+func.func private @scf_for_with_tensor_insert_slice(
%A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>,
%lb : index, %ub : index, %step : index)
-> (tensor<?xf32>, tensor<?xf32>)
@@ -456,11 +456,11 @@ func.func @bar(
// -----
-// CHECK: func @init_and_dot(
+// CHECK: func private @init_and_dot(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>>
-func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
+func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> {
// CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32
%v0 = arith.constant 0.0 : f32
@@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i
// No alloc or copy inside of the loop.
-// CHECK-LABEL: func @inner_func(
+// CHECK-LABEL: func private @inner_func(
// CHECK-SAME: %[[arg0:.*]]: memref<?xf32
-func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
+func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> {
%f = arith.constant 1.0 : f32
%c0 = arith.constant 0 : index
// CHECK: memref.store %{{.*}}, %[[arg0]]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
index e2ab876..b52612d 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir
@@ -24,10 +24,46 @@
// CHECK-NOT: copy
// CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]])
%0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32)
- // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}>
+ // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32{{.*}}>
return %1, %0 : f32, tensor<?xf32>
}
"test.finish" () : () -> ()
}) : () -> ()
+// -----
+#enc1 = #test.tensor_encoding<"hello">
+#enc2 = #test.tensor_encoding<"not hello">
+
+"test.symbol_scope_isolated"() ({
+ // CHECK: func @inner_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"hello">>
+ func.func @inner_func(%t: tensor<?xf32, #enc1>)
+ -> tensor<?xf32, #enc1> {
+ // CHECK: return %[[arg0]]
+ return %t : tensor<?xf32, #enc1>
+ }
+
+ // CHECK: func @outer_func(
+ // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>)
+ // CHECK-SAME: -> (memref<?xf32, #test.memref_layout<"hello">>,
+ // CHECK-SAME: memref<?xf32, #test.memref_layout<"not hello">>)
+ func.func @outer_func(%t0: tensor<?xf32, #enc1>)
+ -> (tensor<?xf32, #enc1>, tensor<?xf32, #enc2>) {
+ // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]])
+ %0 = call @inner_func(%t0)
+ : (tensor<?xf32, #enc1>) -> (tensor<?xf32, #enc1>)
+
+ // CHECK: %[[local:.*]] = "test.create_memref_op"() : ()
+ // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"not hello">>
+ %local = "test.create_tensor_op"() : () -> tensor<?xf32, #enc2>
+ // CHECK: %[[dummy:.*]] = "test.dummy_memref_op"(%[[local]])
+ %1 = "test.dummy_tensor_op"(%local) : (tensor<?xf32, #enc2>)
+ -> tensor<?xf32, #enc2>
+
+ // CHECK: return %[[call]], %[[dummy]]
+ return %0, %1 : tensor<?xf32, #enc1>, tensor<?xf32, #enc2>
+ }
+ "test.finish" () : () -> ()
+}) : () -> ()
diff --git a/mlir/test/Dialect/LLVMIR/bytecode.mlir b/mlir/test/Dialect/LLVMIR/bytecode.mlir
new file mode 100644
index 0000000..821b0ac
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/bytecode.mlir
@@ -0,0 +1,35 @@
+// RUN: mlir-opt -verify-roundtrip %s
+
+#access_group = #llvm.access_group<id = distinct[0]<>>
+#access_group1 = #llvm.access_group<id = distinct[1]<>>
+#di_subprogram = #llvm.di_subprogram<recId = distinct[2]<>>
+#loc1 = loc("test.f90":12:14)
+#loc2 = loc("test":4:3)
+#loc6 = loc(fused<#di_subprogram>[#loc1])
+#loc7 = loc(fused<#di_subprogram>[#loc2])
+#loop_annotation = #llvm.loop_annotation<disableNonforced = false, mustProgress = true, startLoc = #loc6, endLoc = #loc7, parallelAccesses = #access_group, #access_group1>
+module {
+ llvm.func @imp_fn() {
+ llvm.return loc(#loc2)
+ } loc(#loc8)
+ llvm.func @loop_annotation_with_locs() {
+ llvm.br ^bb1 {loop_annotation = #loop_annotation} loc(#loc4)
+ ^bb1: // pred: ^bb0
+ llvm.return loc(#loc5)
+ } loc(#loc3)
+} loc(#loc)
+#di_file = #llvm.di_file<"test.f90" in "">
+#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program>
+#loc = loc("test":0:0)
+#loc3 = loc("test-path":36:3)
+#loc4 = loc("test-path":37:5)
+#loc5 = loc("test-path":39:5)
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[3]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit1 = #llvm.di_compile_unit<id = distinct[4]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_compile_unit2 = #llvm.di_compile_unit<id = distinct[5]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full>
+#di_module = #llvm.di_module<file = #di_file, scope = #di_compile_unit1, name = "mod1">
+#di_module1 = #llvm.di_module<file = #di_file, scope = #di_compile_unit2, name = "mod2">
+#di_imported_entity = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module, file = #di_file, line = 1>
+#di_imported_entity1 = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module1, file = #di_file, line = 1>
+#di_subprogram1 = #llvm.di_subprogram<recId = distinct[2]<>, id = distinct[6]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "imp_fn", file = #di_file, subprogramFlags = Definition, type = #di_subroutine_type, retainedNodes = #di_imported_entity, #di_imported_entity1>
+#loc8 = loc(fused<#di_subprogram1>[#loc1])
diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
index 1834b0a..d7bf99b 100644
--- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir
+++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir
@@ -1,4 +1,5 @@
// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s
// CHECK-DAG: #[[FILE:.*]] = #llvm.di_file<"debuginfo.mlir" in "/test/">
#file = #llvm.di_file<"debuginfo.mlir" in "/test/">
diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
index 7344797..00e763a 100644
--- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir
+++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt -verify-roundtrip %s
// CHECK-LABEL: func @baz
@@ -757,7 +757,7 @@ llvm.func @stackrestore(%arg0: !llvm.ptr) {
// CHECK-LABEL: @experimental_noalias_scope_decl
llvm.func @experimental_noalias_scope_decl() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope
llvm.return
}
@@ -767,7 +767,7 @@ llvm.func @experimental_noalias_scope_decl() {
// CHECK-LABEL: @experimental_noalias_scope_with_string_id
llvm.func @experimental_noalias_scope_with_string_id() {
- // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}}
+ // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}}
llvm.intr.experimental.noalias.scope.decl #alias_scope2
llvm.return
}
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index 618ba34..66cae5c 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } {
} -> tensor<1x1x4xf32>
return
}
+
+ func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) {
+ %0 = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+ affine_map<(d0, d1, d2) -> (d2, d1)>,
+ affine_map<(d0, d1, d2) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel", "reduction"]}
+ ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>)
+ outs(%arg2 : tensor<128x128xi32>) {
+ ^bb0(%in: i32, %in_0: i32, %out: i32):
+ linalg.yield %out : i32
+ } -> tensor<128x128xi32>
+ return
+ }
}
// -----
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 9616a3e..1df15e8 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -10,10 +10,10 @@
// TODO: Some test cases from this file should be moved to other dialects.
-// CHECK-LABEL: func @fill_inplace(
+// CHECK-LABEL: func private @fill_inplace(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
-// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
-func.func @fill_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) {
+func.func private @fill_inplace(
%A : tensor<?xf32> {bufferization.writable = true})
-> tensor<?xf32>
{
@@ -56,10 +56,10 @@ func.func @not_inplace(
// -----
-// CHECK-LABEL: func @not_inplace
+// CHECK-LABEL: func private @not_inplace
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) {
-// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
-func.func @not_inplace(
+// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) {
+func.func private @not_inplace(
%A : tensor<?x?xf32> {bufferization.writable = true})
-> tensor<?x?xf32>
{
@@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1(
// -----
-func.func @gather_like(
+func.func private @gather_like(
%arg0 : tensor<?x?xf32> {bufferization.writable = false},
%arg1 : tensor<?xi32> {bufferization.writable = false},
%arg2 : tensor<?x?xf32> {bufferization.writable = true})
@@ -254,7 +254,7 @@ func.func @gather_like(
} -> tensor<?x?xf32>
return %0 : tensor<?x?xf32>
}
-// CHECK-LABEL: func @gather_like(
+// CHECK-LABEL: func private @gather_like(
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32,
// CHECK-SAME: %[[ARG1:.+]]: memref<?xi32
// CHECK-SAME: %[[ARG2:.+]]: memref<?x?xf32
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
new file mode 100644
index 0000000..35355c6
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir
@@ -0,0 +1,102 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>):
+// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32>
+// CHECK: acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
new file mode 100644
index 0000000..8403ee8
--- /dev/null
+++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir
@@ -0,0 +1,82 @@
+// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s
+
+// CHECK: acc.private.recipe @private_scalar : memref<f32> init {
+// CHECK: ^bb0(%{{.*}}: memref<f32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32>
+// CHECK: acc.yield %[[ALLOC]] : memref<f32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+ %0 = memref.alloca() {test.var = "scalar"} : memref<f32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init {
+// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_static_2d() {
+ %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>):
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_dynamic_2d(%arg0: index, %arg1: index) {
+ %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init {
+// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>):
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32>
+// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32>
+// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>):
+// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32>
+// CHECK: acc.terminator
+// CHECK: }
+
+func.func @test_mixed_dims(%arg0: index) {
+ %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32>
+ return
+}
+
+// -----
+
+// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init {
+// CHECK: ^bb0(%{{.*}}: memref<i32>):
+// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32>
+// CHECK: acc.yield %[[ALLOC]] : memref<i32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar_int() {
+ %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32>
+ return
+}
+
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index a1067ec..af09dc8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -8,11 +8,11 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @scf_for_yield_only(
+// CHECK-LABEL: func private @scf_for_yield_only(
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: ) -> memref<?xf32> {
-func.func @scf_for_yield_only(
+func.func private @scf_for_yield_only(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%lb : index, %ub : index, %step : index)
@@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true},
// -----
-// CHECK-LABEL: func @scf_for_with_tensor.insert_slice
+// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice
// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>
// CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @scf_for_with_tensor.insert_slice(
+func.func private @scf_for_with_tensor.insert_slice(
%A : tensor<?xf32> {bufferization.writable = false},
%B : tensor<?xf32> {bufferization.writable = true},
%C : tensor<4xf32> {bufferization.writable = false},
@@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>,
// -----
-// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict(
+// CHECK-LABEL: func private @parallel_insert_slice_no_conflict(
// CHECK-SAME: %[[idx:.*]]: index, %[[idx2:.*]]: index,
// CHECK-SAME: %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
// CHECK-SAME: %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
-func.func @parallel_insert_slice_no_conflict(
+func.func private @parallel_insert_slice_no_conflict(
%idx: index,
%idx2: index,
%arg1: tensor<?xf32> {bufferization.writable = true},
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
index 5f95da2..b6c72be 100644
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -8,12 +8,12 @@
// Test bufferization using memref types that have no layout map.
// RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null
-// CHECK-LABEL: func @insert_slice_fun
+// CHECK-LABEL: func private @insert_slice_fun
// CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>,
// CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>
-func.func @insert_slice_fun(
+func.func private @insert_slice_fun(
%A0 : tensor<?xf32> {bufferization.writable = false},
%A1 : tensor<?xf32> {bufferization.writable = true},
%t0 : tensor<4xf32> {bufferization.writable = false},
@@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index)
// -----
// CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)>
-// CHECK-LABEL: func.func @cast_retains_buffer_layout(
+// CHECK-LABEL: func.func private @cast_retains_buffer_layout(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout(
+func.func private @cast_retains_buffer_layout(
%t: tensor<?xf32>
{bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>},
%sz: index)
@@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout(
// -----
-// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided(
+// CHECK-LABEL: func private @cast_retains_buffer_layout_strided(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> {
// CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>>
// CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>>
// CHECK: return %[[slice]]
-func.func @cast_retains_buffer_layout_strided(
+func.func private @cast_retains_buffer_layout_strided(
%t: tensor<?xf32>
{bufferization.buffer_layout = strided<[1], offset: 5>},
%sz: index)
diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
new file mode 100644
index 0000000..023a0e5
--- /dev/null
+++ b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir
@@ -0,0 +1,311 @@
+// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s
+
+///===----------------------------------------------===//
+/// Tests of `StepCompareFolder`
+///===----------------------------------------------===//
+
+
+///===------------------------------------===//
+/// Tests of `ugt` (unsigned greater than)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ugt_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 3 > [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 > [0, 1, 2] => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_max_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_max_rhs() -> vector<3xi1> {
+ // The largest i64 possible:
+ %cst = arith.constant dense<0x7fffffffffffffff> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+
+// -----
+
+// CHECK-LABEL: @ugt_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ugt_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 2 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ugt, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ugt_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ugt_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] > 1 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ugt, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `uge` (unsigned greater than or equal)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @uge_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 >= [0, 1, 2] => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 >= [0, 1, 2] => [true, false, false] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @uge_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @uge_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 3 => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_uge_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_uge_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] >= 2 => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi uge, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+
+///===------------------------------------===//
+/// Tests of `ult` (unsigned less than)
+///===------------------------------------===//
+
+
+// CHECK-LABEL: @ult_constant_2_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 2 < [0, 1, 2] => [false, false, false] => false for all indices => fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_1_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_1_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // 1 < [0, 1, 2] => [false, false, true] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ult_constant_3_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ult_constant_3_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 3 => [true, true, true] => true for all indices => fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ult_constant_2_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ult_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ // [0, 1, 2] < 2 => [true, true, false] => not same for all indices => don't fold
+ %1 = arith.cmpi ult, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ule` (unsigned less than or equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ule_constant_3_lhs
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_3_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_2_lhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_2_lhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %cst, %0 : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @ule_constant_2_rhs
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ule_constant_2_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst : vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ule_constant_1_rhs
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ule_constant_1_rhs() -> vector<3xi1> {
+ %cst = arith.constant dense<1> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ule, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `eq` (equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @eq_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @eq_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_eq_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_eq_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi eq, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+///===------------------------------------===//
+/// Tests of `ne` (not equal)
+///===------------------------------------===//
+
+// CHECK-LABEL: @ne_constant_3
+// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1>
+// CHECK: return %[[CST]] : vector<3xi1>
+func.func @ne_constant_3() -> vector<3xi1> {
+ %cst = arith.constant dense<3> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: @negative_ne_constant_2
+// CHECK: %[[CMP:.*]] = arith.cmpi
+// CHECK: return %[[CMP]]
+func.func @negative_ne_constant_2() -> vector<3xi1> {
+ %cst = arith.constant dense<2> : vector<3xindex>
+ %0 = vector.step : vector<3xindex>
+ %1 = arith.cmpi ne, %0, %cst: vector<3xindex>
+ return %1 : vector<3xi1>
+}
+
diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
index 35db14e..e5a98b5 100644
--- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir
+++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir
@@ -188,15 +188,38 @@ func.func @vector_fma(%a: vector<4x4xf32>, %b: vector<4x4xf32>, %c: vector<4x4xf
// CHECK-LABEL: func @vector_fma
// CHECK-COUNT-4: vector.fma %{{.+}}, %{{.+}}, %{{.+}} : vector<2x2xf32>
-// TODO: We should be able to unroll this like the example above - this will require extending UnrollElementwisePattern.
-func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
+func.func @vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{
%0 = vector.fma %a, %a, %a : vector<3x2x2xf32>
return %0 : vector<3x2x2xf32>
}
-// CHECK-LABEL: func @negative_vector_fma_3d
-// CHECK-NOT: vector.extract_strided_slice
-// CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32>
-// CHECK: return
+// CHECK-LABEL: func @vector_fma_3d
+// CHECK-SAME: (%[[SRC:.*]]: vector<3x2x2xf32>) -> vector<3x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<3x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_0:.*]] = vector.shape_cast %[[E_OUT_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA0:.*]] = vector.fma %[[S_LHS_0]], %[[S_RHS_0]], %[[S_OUT_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[FMA0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_1:.*]] = vector.shape_cast %[[E_OUT_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA1:.*]] = vector.fma %[[S_LHS_1]], %[[S_RHS_1]], %[[S_OUT_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[FMA1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: %[[E_LHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_2:.*]] = vector.shape_cast %[[E_LHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_2:.*]] = vector.shape_cast %[[E_RHS_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_OUT_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_OUT_2:.*]] = vector.shape_cast %[[E_OUT_2]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[FMA2:.*]] = vector.fma %[[S_LHS_2]], %[[S_RHS_2]], %[[S_OUT_2]] : vector<2x2xf32>
+// CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[FMA2]], %[[I1]] {offsets = [2, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32>
+// CHECK: return %[[I2]] : vector<3x2x2xf32>
func.func @vector_multi_reduction(%v : vector<4x6xf32>, %acc: vector<4xf32>) -> vector<4xf32> {
%0 = vector.multi_reduction #vector.kind<add>, %v, %acc [1] : vector<4x6xf32> to vector<4xf32>
@@ -440,3 +463,36 @@ func.func @vector_step() -> vector<32xindex> {
// CHECK: %[[ADD3:.*]] = arith.addi %[[STEP]], %[[CST]] : vector<8xindex>
// CHECK: %[[INS3:.*]] = vector.insert_strided_slice %[[ADD3]], %[[INS2]] {offsets = [24], strides = [1]} : vector<8xindex> into vector<32xindex>
// CHECK: return %[[INS3]] : vector<32xindex>
+
+
+func.func @elementwise_3D_to_2D(%v1: vector<2x2x2xf32>, %v2: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2xf32>
+ return %0 : vector<2x2x2xf32>
+}
+// CHECK-LABEL: func @elementwise_3D_to_2D
+// CHECK-SAME: (%[[ARG0:.*]]: vector<2x2x2xf32>, %[[ARG1:.*]]: vector<2x2x2xf32>) -> vector<2x2x2xf32> {
+// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2x2xf32>
+// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD0:.*]] = arith.addf %[[S_LHS_0]], %[[S_RHS_0]] : vector<2x2xf32>
+// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[ADD0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32>
+// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32>
+// CHECK: %[[ADD1:.*]] = arith.addf %[[S_LHS_1]], %[[S_RHS_1]] : vector<2x2xf32>
+// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[ADD1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32>
+// CHECK: return %[[I1]] : vector<2x2x2xf32>
+
+
+func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf32>) -> vector<2x2x2x2xf32> {
+ %0 = arith.addf %v1, %v2 : vector<2x2x2x2xf32>
+ return %0 : vector<2x2x2x2xf32>
+}
+
+// CHECK-LABEL: func @elementwise_4D_to_2D
+// CHECK-COUNT-4: arith.addf %{{.*}}, %{{.*}} : vector<2x2xf32>
+// CHECK-NOT: arith.addf
+// CHECK: return
diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
index bb76392..401cdd29 100644
--- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
+++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir
@@ -1925,3 +1925,22 @@ func.func @warp_scf_if_distribute(%pred : i1) {
// CHECK-PROP: "some_use"(%[[IF_YIELD_DIST]]) : (vector<1xf32>) -> ()
// CHECK-PROP: return
// CHECK-PROP: }
+
+// -----
+func.func @dedup_unused_result(%laneid : index) -> (vector<1xf32>) {
+ %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] ->
+ (vector<1xf32>, vector<2xf32>, vector<1xf32>) {
+ %2 = "some_def"() : () -> (vector<32xf32>)
+ %3 = "some_def"() : () -> (vector<64xf32>)
+ gpu.yield %2, %3, %2 : vector<32xf32>, vector<64xf32>, vector<32xf32>
+ }
+ %r0 = "some_use"(%r#2, %r#2) : (vector<1xf32>, vector<1xf32>) -> (vector<1xf32>)
+ return %r0 : vector<1xf32>
+}
+
+// CHECK-PROP: func @dedup_unused_result
+// CHECK-PROP: %[[R:.*]] = gpu.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>)
+// CHECK-PROP: %[[Y0:.*]] = "some_def"() : () -> vector<32xf32>
+// CHECK-PROP: %[[Y1:.*]] = "some_def"() : () -> vector<64xf32>
+// CHECK-PROP: gpu.yield %[[Y0]] : vector<32xf32>
+// CHECK-PROP: "some_use"(%[[R]], %[[R]]) : (vector<1xf32>, vector<1xf32>) -> vector<1xf32>
diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
index 9d63c2d..fe4f44c 100644
--- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
+++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir
@@ -584,3 +584,101 @@ gpu.module @test_kernel {
gpu.return
}
}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets
+ // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32>
+ gpu.func @load_with_offsets(%src: ui64) -> vector<32xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout_result_0 = #xegpu.layout<inst_data = [16]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32>
+
+ gpu.return %ld : vector<32xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets
+ // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.0>: vector<32xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 1, layout_operand_0 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1>
+
+ gpu.return
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: load_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32>
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32>
+ gpu.func @load_with_offsets_chunk(%src: ui64) -> vector<32x4xf32> {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+ %ld = xegpu.load %src[%cst], %mask {chunk_size = 4, layout_result_0 = #xegpu.layout<inst_data = [16, 2]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32x4xf32>
+ gpu.return %ld : vector<32x4xf32>
+ }
+}
+
+// -----
+gpu.module @test_kernel {
+ // CHECK-LABEL: store_with_offsets_chunk
+ // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
+ // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1>
+ gpu.func @store_with_offsets_chunk(%src: ui64) {
+ %cst = arith.constant dense<[
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 64, 72, 80, 88, 96, 104, 112, 120,
+ 128, 136, 144, 152, 160, 168, 176, 184,
+ 192, 200, 208, 216, 224, 232, 240, 248
+ ]> : vector<32xindex>
+
+ %c17 = arith.constant 17: index
+ %mask = vector.create_mask %c17: vector<32xi1>
+
+ %st_vec = arith.constant dense<1023.>: vector<32x4xf32>
+ xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 4, layout_operand_0 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_2 = #xegpu.layout<inst_data = [16, 2]>,
+ layout_operand_3 = #xegpu.layout<inst_data = [16, 2]>,
+ l1_hint = #xegpu.cache_hint<cached>} : vector<32x4xf32>, ui64, vector<32xindex>, vector<32xi1>
+ gpu.return
+ }
+}
diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
index cb16c37..b3154d4 100644
--- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
+++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir
@@ -3,7 +3,16 @@
// RUN: | FileCheck %s
// CHECK: data =
-// CHECK-RAW: [[[7.7, 0, 0], [7.7, 0, 0], [7.7, 0, 0]], [[0, 7.7, 0], [0, 7.7, 0], [0, 7.7, 0]], [[0, 0, 7.7], [0, 0, 7.7], [0, 0, 7.7]]]
+// CHECK{LITERAL}: [[[7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0],
+// CHECK{LITERAL}: [7.7, 0, 0]],
+// CHECK{LITERAL}: [[0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0],
+// CHECK{LITERAL}: [0, 7.7, 0]],
+// CHECK{LITERAL}: [[0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7],
+// CHECK{LITERAL}: [0, 0, 7.7]]]
+
module attributes {
gpu.container_module,
spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir
new file mode 100644
index 0000000..325271e
--- /dev/null
+++ b/mlir/test/Pass/remark-final.mlir
@@ -0,0 +1,17 @@
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s
+// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml
+// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml
+module @foo {
+ "test.op"() : () -> ()
+
+}
+
+// CHECK-YAML-NOT: This is a test passed remark (should be dropped)
+// CHECK-YAML-DAG: !Analysis
+// CHECK-YAML-DAG: !Failure
+// CHECK-YAML-DAG: !Passed
+
+// CHECK-NOT: This is a test passed remark (should be dropped)
+// CHECK-DAG: remark: [Analysis] test-remark
+// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed
+// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed
diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll
index e056e43..61376b8 100644
--- a/mlir/test/Target/LLVMIR/Import/debug-info.ll
+++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll
@@ -240,11 +240,10 @@ define void @subprogram() !dbg !3 {
define void @func_loc() !dbg !3 {
ret void
}
-; CHECK-DAG: #[[NAME_LOC:.+]] = loc("func_loc")
; CHECK-DAG: #[[FILE_LOC:.+]] = loc("debug-info.ll":42:0)
; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #{{.*}}, name = "func_loc", file = #{{.*}}, line = 42, subprogramFlags = Definition>
-; CHECK: loc(fused<#[[SP]]>[#[[NAME_LOC]], #[[FILE_LOC]]]
+; CHECK: loc(fused<#[[SP]]>[#[[FILE_LOC]]]
!llvm.dbg.cu = !{!1}
!llvm.module.flags = !{!0}
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
new file mode 100644
index 0000000..04e2ddf
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir
@@ -0,0 +1,12 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: @convert_f32x2_to_f4x2_e2m1
+llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) {
+ // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res1]] to i8
+ %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN)
+ // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}})
+ // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8
+ %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN)
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
index 0b36154..b2fe2f5 100644
--- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir
@@ -254,6 +254,14 @@ llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) {
// -----
+llvm.func @nvvm_cvt_f32x2_to_f4x2_invalid_type(%a : f32, %b : f32) {
+ // expected-error @below {{Only 'f4E2M1FN' type is supported for conversions from f32x2 to f4x2.}}
+ %res = nvvm.convert.f32x2.to.f4x2 %a, %b : i8 (f8E4M3FN)
+ llvm.return
+}
+
+// -----
+
llvm.func @nvvm_prefetch_L1_with_evict_priority(%global_ptr: !llvm.ptr<1>) {
// expected-error @below {{cache eviction priority supported only for cache level L2}}
nvvm.prefetch level = L1, evict_priority = evict_last, %global_ptr : !llvm.ptr<1>
@@ -559,3 +567,13 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
llvm.return
}
+
+
+// -----
+
+// Test for range validation - invalid range where lower == upper but not at extremes
+func.func @invalid_range_equal_bounds() {
+ // expected-error @below {{invalid range attribute: Lower == Upper, but they aren't min (0) or max (4294967295) value! This is an invalid constant range.}}
+ %0 = nvvm.read.ptx.sreg.warpsize range <i32, 32, 32> : i32
+ return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 00a479d..594ae48 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -152,6 +152,10 @@ llvm.func @nvvm_special_regs() -> i32 {
%74 = nvvm.read.ptx.sreg.lanemask.ge : i32
//CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt
%75 = nvvm.read.ptx.sreg.lanemask.gt : i32
+ // CHECK: %76 = call range(i32 0, 0) i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32
+ // CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32
llvm.return %1 : i32
}
diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt
index 9187998..c37671a 100644
--- a/mlir/test/lib/Analysis/CMakeLists.txt
+++ b/mlir/test/lib/Analysis/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis
DataFlow/TestDenseForwardDataFlowAnalysis.cpp
DataFlow/TestLivenessAnalysis.cpp
DataFlow/TestSparseBackwardDataFlowAnalysis.cpp
+ DataFlow/TestStridedMetadataRangeAnalysis.cpp
EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
new file mode 100644
index 0000000..6ac09fd
--- /dev/null
+++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp
@@ -0,0 +1,86 @@
+//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h"
+#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h"
+#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h"
+#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h"
+#include "mlir/Analysis/DataFlowFramework.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::dataflow;
+
+static void printAnalysisResults(DataFlowSolver &solver, Operation *op,
+ raw_ostream &os) {
+ // Collect the strided metadata of the op results.
+ SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results;
+ for (OpResult result : op->getResults()) {
+ const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result);
+ // Skip the result if it's uninitialized.
+ if (!state || state->getValue().isUninitialized())
+ continue;
+
+ // Skip the result if the range is empty.
+ const mlir::StridedMetadataRange &md = state->getValue();
+ if (md.getOffsets().empty() && md.getSizes().empty() &&
+ md.getStrides().empty())
+ continue;
+ results.push_back({result.getResultNumber(), state});
+ }
+
+ // Early exit if there's no metadata to print.
+ if (results.empty())
+ return;
+
+ // Print the metadata.
+ os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n";
+ for (auto [idx, state] : results)
+ os << " result[" << idx << "]: " << state->getValue() << "\n";
+ os << "\n";
+}
+
+namespace {
+struct TestStridedMetadataRangeAnalysisPass
+ : public PassWrapper<TestStridedMetadataRangeAnalysisPass,
+ OperationPass<>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
+ TestStridedMetadataRangeAnalysisPass)
+
+ StringRef getArgument() const override {
+ return "test-strided-metadata-range-analysis";
+ }
+ void runOnOperation() override {
+ Operation *op = getOperation();
+
+ DataFlowSolver solver;
+ solver.load<DeadCodeAnalysis>();
+ solver.load<SparseConstantPropagation>();
+ solver.load<IntegerRangeAnalysis>();
+ solver.load<StridedMetadataRangeAnalysis>();
+ if (failed(solver.initializeAndRun(op)))
+ return signalPassFailure();
+
+ op->walk(
+ [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); });
+ }
+};
+} // end anonymous namespace
+
+namespace mlir {
+namespace test {
+void registerTestStridedMetadataRangeAnalysisPass() {
+ PassRegistration<TestStridedMetadataRangeAnalysisPass>();
+}
+} // end namespace test
+} // end namespace mlir
diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
index 1e2d4a7..4069a74 100644
--- a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
+++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp
@@ -11,11 +11,25 @@
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h"
#include "mlir/Dialect/Bufferization/Transforms/Transforms.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/Pass.h"
+#include "TestAttributes.h" // TestTensorEncodingAttr, TestMemRefLayoutAttr
+#include "TestDialect.h"
+
using namespace mlir;
namespace {
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding = dyn_cast_if_present<test::TestTensorEncodingAttr>(
+ tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
struct TestOneShotModuleBufferizePass
: public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass)
@@ -25,6 +39,7 @@ struct TestOneShotModuleBufferizePass
: PassWrapper(pass) {}
void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<test::TestDialect>();
registry.insert<bufferization::BufferizationDialect>();
}
StringRef getArgument() const final {
@@ -41,6 +56,17 @@ struct TestOneShotModuleBufferizePass
bufferization::OneShotBufferizationOptions opt;
opt.bufferizeFunctionBoundaries = true;
+ opt.functionArgTypeConverterFn =
+ [&](bufferization::TensorLikeType tensor, Attribute memSpace,
+ func::FuncOp, const bufferization::BufferizationOptions &) {
+ assert(isa<RankedTensorType>(tensor) && "tests only builtin tensors");
+ auto tensorType = cast<RankedTensorType>(tensor);
+ auto layout = getMemRefLayoutForTensorEncoding(tensorType);
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(tensorType.getShape(),
+ tensorType.getElementType(), layout, memSpace));
+ };
+
bufferization::BufferizationState bufferizationState;
if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt,
diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
index f84055d..1e59338 100644
--- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt
@@ -1,6 +1,7 @@
add_mlir_library(MLIROpenACCTestPasses
TestOpenACC.cpp
TestPointerLikeTypeInterface.cpp
+ TestRecipePopulate.cpp
EXCLUDE_FROM_LIBMLIR
)
diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
index 9886240..bea21b9 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp
@@ -15,9 +15,13 @@ namespace test {
// Forward declarations of individual test pass registration functions
void registerTestPointerLikeTypeInterfacePass();
+void registerTestRecipePopulatePass();
// Unified registration function for all OpenACC tests
-void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); }
+void registerTestOpenACC() {
+ registerTestPointerLikeTypeInterfacePass();
+ registerTestRecipePopulatePass();
+}
} // namespace test
} // namespace mlir
diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
index 85f9283..027b0a1 100644
--- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
+++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp
@@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate(
newBuilder.setInsertionPointAfter(op);
// Call the genAllocate API
+ bool needsFree = false;
Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc",
- result.getType(), result);
+ result.getType(), result, needsFree);
if (allocRes) {
llvm::errs() << "Successfully generated alloc for operation: ";
op->print(llvm::errs());
llvm::errs() << "\n";
+ llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n";
// Print all operations that were inserted
for (Operation *insertedOp : tracker.insertedOps) {
@@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result,
// Call the genFree API
auto typedResult = cast<TypedValue<PointerLikeType>>(result);
- bool success =
- pointerType.genFree(newBuilder, loc, typedResult, result.getType());
+ bool success = pointerType.genFree(newBuilder, loc, typedResult, result,
+ result.getType());
if (success) {
llvm::errs() << "Successfully generated free for operation: ";
diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
new file mode 100644
index 0000000..35f092c
--- /dev/null
+++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp
@@ -0,0 +1,110 @@
+//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains test passes for testing the createAndPopulate methods
+// of the recipe operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace mlir;
+using namespace mlir::acc;
+
+namespace {
+
+struct TestRecipePopulatePass
+ : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> {
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass)
+
+ TestRecipePopulatePass() = default;
+ TestRecipePopulatePass(const TestRecipePopulatePass &pass)
+ : PassWrapper(pass) {
+ recipeType = pass.recipeType;
+ }
+
+ Pass::Option<std::string> recipeType{
+ *this, "recipe-type",
+ llvm::cl::desc("Recipe type: private or firstprivate"),
+ llvm::cl::init("private")};
+
+ StringRef getArgument() const override { return "test-acc-recipe-populate"; }
+
+ StringRef getDescription() const override {
+ return "Test OpenACC recipe population";
+ }
+
+ void runOnOperation() override;
+
+ void getDependentDialects(DialectRegistry &registry) const override {
+ registry.insert<acc::OpenACCDialect>();
+ registry.insert<arith::ArithDialect>();
+ registry.insert<memref::MemRefDialect>();
+ }
+};
+
+void TestRecipePopulatePass::runOnOperation() {
+ auto module = getOperation();
+ OpBuilder builder(&getContext());
+
+ // Collect all test variables
+ SmallVector<std::tuple<Operation *, Value, std::string>> testVars;
+
+ module.walk([&](Operation *op) {
+ if (auto varName = op->getAttrOfType<StringAttr>("test.var")) {
+ for (auto result : op->getResults()) {
+ testVars.push_back({op, result, varName.str()});
+ }
+ }
+ });
+
+ // Generate recipes at module level
+ builder.setInsertionPoint(&module.getBodyRegion().front(),
+ module.getBodyRegion().front().begin());
+
+ for (auto [op, var, varName] : testVars) {
+ Location loc = op->getLoc();
+
+ std::string recipeName = recipeType.getValue() + "_" + varName;
+ ValueRange bounds; // No bounds for memref tests
+
+ if (recipeType == "private") {
+ auto recipe = PrivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create private recipe for ") << varName;
+ }
+ } else if (recipeType == "firstprivate") {
+ auto recipe = FirstprivateRecipeOp::createAndPopulate(
+ builder, loc, recipeName, var.getType(), varName, bounds);
+
+ if (!recipe) {
+ op->emitError("Failed to create firstprivate recipe for ") << varName;
+ }
+ }
+ }
+}
+
+} // namespace
+
+namespace mlir {
+namespace test {
+
+void registerTestRecipePopulatePass() {
+ PassRegistration<TestRecipePopulatePass>();
+}
+
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
index 5685004..9e7e4f8 100644
--- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td
+++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td
@@ -22,6 +22,7 @@ include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/BuiltinAttributeInterfaces.td"
include "mlir/IR/EnumAttr.td"
include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/TensorEncoding.td"
// All of the attributes will extend this class.
class Test_Attr<string name, list<Trait> traits = []>
@@ -439,4 +440,20 @@ def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> {
let hasStorageCustomConstructor = 1;
}
+def TestTensorEncodingAttr : Test_Attr<"TestTensorEncoding",
+ [DeclareAttrInterfaceMethods<VerifiableTensorEncoding>]> {
+ let mnemonic = "tensor_encoding";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
+def TestMemRefLayoutAttr : Test_Attr<"TestMemRefLayout",
+ [DeclareAttrInterfaceMethods<MemRefLayoutAttrInterface>]> {
+ let mnemonic = "memref_layout";
+
+ let parameters = (ins "mlir::StringAttr":$dummy);
+ let assemblyFormat = "`<` $dummy `>`";
+}
+
#endif // TEST_ATTRDEFS
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
index fe1e916..9db7b01 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp
@@ -542,6 +542,24 @@ test::detail::TestCustomStorageCtorAttrAttrStorage::construct(
}
//===----------------------------------------------------------------------===//
+// TestTensorEncodingAttr
+//===----------------------------------------------------------------------===//
+
+::llvm::LogicalResult TestTensorEncodingAttr::verifyEncoding(
+ mlir::ArrayRef<int64_t> shape, mlir::Type elementType,
+ llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const {
+ return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
+// TestMemRefLayoutAttr
+//===----------------------------------------------------------------------===//
+
+mlir::AffineMap TestMemRefLayoutAttr::getAffineMap() const {
+ return mlir::AffineMap::getMultiDimIdentityMap(1, getContext());
+}
+
+//===----------------------------------------------------------------------===//
// TestDialect
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.h b/mlir/test/lib/Dialect/Test/TestAttributes.h
index 778d84fa..0ad5ab6 100644
--- a/mlir/test/lib/Dialect/Test/TestAttributes.h
+++ b/mlir/test/lib/Dialect/Test/TestAttributes.h
@@ -24,6 +24,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/IR/DialectImplementation.h"
#include "mlir/IR/DialectResourceBlobManager.h"
+#include "mlir/IR/TensorEncoding.h"
// generated files require above includes to come first
#include "TestAttrInterfaces.h.inc"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index f2adca6..bcf3b55d 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
#include "TestInterfaces.h"
#include "TestTypes.h"
#include "mlir/Bytecode/BytecodeImplementation.h"
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/DLTI/Traits.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.td b/mlir/test/lib/Dialect/Test/TestDialect.td
index 2b5491f..37a263f 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.td
+++ b/mlir/test/lib/Dialect/Test/TestDialect.td
@@ -24,7 +24,10 @@ def Test_Dialect : Dialect {
let useDefaultTypePrinterParser = 0;
let useDefaultAttributePrinterParser = 1;
let isExtensible = 1;
- let dependentDialects = ["::mlir::DLTIDialect"];
+ let dependentDialects = [
+ "::mlir::DLTIDialect",
+ "::mlir::bufferization::BufferizationDialect"
+ ];
let discardableAttrs = (ins
"mlir::IntegerAttr":$discardable_attr_key,
"SimpleAAttr":$other_discardable_attr_key
diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
index 53055fe..b211e24 100644
--- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp
@@ -1425,6 +1425,39 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return createNewMultiAllocaWithoutSlot(slot, builder, *this);
}
+namespace {
+/// Returns test dialect's memref layout for test dialect's tensor encoding when
+/// applicable.
+MemRefLayoutAttrInterface
+getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) {
+ if (auto encoding =
+ dyn_cast<test::TestTensorEncodingAttr>(tensorType.getEncoding())) {
+ return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get(
+ tensorType.getContext(), encoding.getDummy()));
+ }
+ return {};
+}
+
+/// Auxiliary bufferization function for test and builtin tensors.
+bufferization::BufferLikeType
+convertTensorToBuffer(mlir::Operation *op,
+ const bufferization::BufferizationOptions &options,
+ bufferization::TensorLikeType tensorLike) {
+ auto buffer =
+ *tensorLike.getBufferType(options, [&]() { return op->emitError(); });
+ if (auto memref = dyn_cast<MemRefType>(buffer)) {
+ // Note: For the sake of testing, we want to ensure that encoding -> layout
+ // bufferization happens. This is currently achieved manually.
+ auto layout =
+ getMemRefLayoutForTensorEncoding(cast<RankedTensorType>(tensorLike));
+ return cast<bufferization::BufferLikeType>(
+ MemRefType::get(memref.getShape(), memref.getElementType(), layout,
+ memref.getMemorySpace()));
+ }
+ return buffer;
+}
+} // namespace
+
::mlir::LogicalResult test::TestDummyTensorOp::bufferize(
::mlir::RewriterBase &rewriter,
const ::mlir::bufferization::BufferizationOptions &options,
@@ -1435,8 +1468,8 @@ TestMultiSlotAlloca::handleDestructuringComplete(
return failure();
const auto outType = getOutput().getType();
- const auto bufferizedOutType = test::TestMemrefType::get(
- getContext(), outType.getShape(), outType.getElementType(), nullptr);
+ const auto bufferizedOutType =
+ convertTensorToBuffer(getOperation(), options, outType);
// replace op with memref analogy
auto dummyMemrefOp = test::TestDummyMemrefOp::create(
rewriter, getLoc(), bufferizedOutType, *buffer);
@@ -1470,13 +1503,12 @@ TestMultiSlotAlloca::handleDestructuringComplete(
mlir::FailureOr<mlir::bufferization::BufferLikeType>
test::TestCreateTensorOp::getBufferType(
- mlir::Value value, const mlir::bufferization::BufferizationOptions &,
+ mlir::Value value, const mlir::bufferization::BufferizationOptions &options,
const mlir::bufferization::BufferizationState &,
llvm::SmallVector<::mlir::Value> &) {
- const auto type = dyn_cast<test::TestTensorType>(value.getType());
+ const auto type = dyn_cast<bufferization::TensorLikeType>(value.getType());
if (type == nullptr)
return failure();
- return cast<mlir::bufferization::BufferLikeType>(test::TestMemrefType::get(
- getContext(), type.getShape(), type.getElementType(), nullptr));
+ return convertTensorToBuffer(getOperation(), options, type);
}
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6329d61..05a33cf 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -32,6 +32,7 @@ include "mlir/Interfaces/MemorySlotInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/ValueBoundsOpInterface.td"
include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td"
// Include the attribute definitions.
include "TestAttrDefs.td"
@@ -2335,7 +2336,7 @@ def SideEffectWithRegionOp : TEST_Op<"side_effect_with_region_op",
}
//===----------------------------------------------------------------------===//
-// Copy Operation Test
+// Copy Operation Test
//===----------------------------------------------------------------------===//
def CopyOp : TEST_Op<"copy", []> {
@@ -3676,10 +3677,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
["bufferize", "bufferizesToMemoryRead",
"bufferizesToMemoryWrite", "getAliasingValues"]>]> {
let arguments = (ins
- Arg<TestTensorType>:$input
+ Arg<Bufferization_TensorLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestTensorType>:$output
+ Arg<Bufferization_TensorLikeTypeInterface>:$output
);
let extraClassDefinition = [{
@@ -3701,10 +3702,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op",
def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> {
let arguments = (ins
- Arg<TestMemrefType>:$input
+ Arg<Bufferization_BufferLikeTypeInterface>:$input
);
let results = (outs
- Arg<TestMemrefType>:$output
+ Arg<Bufferization_BufferLikeTypeInterface>:$output
);
}
@@ -3714,7 +3715,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
"bufferizesToMemoryWrite", "getAliasingValues",
"bufferizesToAllocation"]>]> {
let arguments = (ins);
- let results = (outs Arg<TestTensorType>:$output);
+ let results = (outs Arg<Bufferization_TensorLikeTypeInterface>:$output);
let extraClassDefinition = [{
bool test::TestCreateTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&,
const ::mlir::bufferization::AnalysisState&) {
@@ -3738,7 +3739,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op",
def TestCreateMemrefOp : TEST_Op<"create_memref_op"> {
let arguments = (ins);
- let results = (outs Arg<TestMemrefType>:$output);
+ let results = (outs Arg<Bufferization_BufferLikeTypeInterface>:$output);
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp
index 3b25686..5ca2d1a 100644
--- a/mlir/test/lib/Pass/TestRemarksPass.cpp
+++ b/mlir/test/lib/Pass/TestRemarksPass.cpp
@@ -43,7 +43,12 @@ public:
<< remark::add("This is a test missed remark")
<< remark::reason("because we are testing the remark pipeline")
<< remark::suggest("try using the remark pipeline feature");
-
+ mlir::remark::passed(
+ loc,
+ remark::RemarkOpts::name("test-remark").category("category-1-passed"))
+ << remark::add("This is a test passed remark (should be dropped)")
+ << remark::reason("because we are testing the remark pipeline")
+ << remark::suggest("try using the remark pipeline feature");
mlir::remark::passed(
loc,
remark::RemarkOpts::name("test-remark").category("category-1-passed"))
diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td
index a896888..9dcf975 100644
--- a/mlir/test/mlir-tblgen/cpp-class-comments.td
+++ b/mlir/test/mlir-tblgen/cpp-class-comments.td
@@ -96,17 +96,14 @@ def EncodingTrait : AttrInterface<"EncodingTrait"> {
}];
let methods = [
];
-// ATTR-INTERFACE: namespace mlir
-// ATTR-INTERFACE-NEXT: namespace a
-// ATTR-INTERFACE-NEXT: namespace traits
+// ATTR-INTERFACE: namespace mlir::a::traits {
// ATTR-INTERFACE-NEXT: /// Common trait for all layouts.
// ATTR-INTERFACE-NEXT: class EncodingTrait;
}
def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> {
let cppNamespace = "a::traits";
-// ATTR-INTERFACE: namespace a {
-// ATTR-INTERFACE-NEXT: namespace traits {
+// ATTR-INTERFACE: namespace a::traits {
// ATTR-INTERFACE-NEXT: class SimpleEncodingTrait;
}
@@ -116,8 +113,7 @@ def SimpleOpInterface : OpInterface<"SimpleOpInterface"> {
Simple Op Interface description
}];
-// OP-INTERFACE: namespace a {
-// OP-INTERFACE-NEXT: namespace traits {
+// OP-INTERFACE: namespace a::traits {
// OP-INTERFACE-NEXT: /// Simple Op Interface description
// OP-INTERFACE-NEXT: class SimpleOpInterface;
}
diff --git a/mlir/test/mlir-tblgen/dialect.td b/mlir/test/mlir-tblgen/dialect.td
index f35ce34..9b45495 100644
--- a/mlir/test/mlir-tblgen/dialect.td
+++ b/mlir/test/mlir-tblgen/dialect.td
@@ -62,9 +62,14 @@ def E_SpecialNSOp : Op<E_Dialect, "special_ns_op", []> {
// DEF: ::E::SPECIAL_NS::SpecialNSOp definitions
// DECL-LABEL: GET_OP_CLASSES
+// DECL: namespace a {
// DECL: a::SomeOp declarations
+// DECL: namespace BNS {
// DECL: BNS::SomeOp declarations
+// DECL: namespace C::CC {
// DECL: ::C::CC::SomeOp declarations
// DECL: DSomeOp declarations
+// DECL: namespace ENS {
// DECL: ENS::SomeOp declarations
+// DECL: namespace E::SPECIAL_NS {
// DECL: ::E::SPECIAL_NS::SpecialNSOp declarations