diff options
Diffstat (limited to 'mlir/test')
46 files changed, 1673 insertions, 84 deletions
diff --git a/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir new file mode 100644 index 0000000..808c1c2 --- /dev/null +++ b/mlir/test/Analysis/DataFlow/test-strided-metadata-range-analysis.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt -test-strided-metadata-range-analysis %s 2>&1 | FileCheck %s + +func.func @memref_subview(%arg0: memref<8x16x4xf32, strided<[64, 4, 1]>>, %arg1: memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>>, %arg2: memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>>, %arg3: index, %arg4: index, %arg5: index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %0 = test.with_bounds {smax = 13 : index, smin = 11 : index, umax = 13 : index, umin = 11 : index} : index + %1 = test.with_bounds {smax = 7 : index, smin = 5 : index, umax = 7 : index, umin = 5 : index} : index + + // Test subview with unknown sizes, and constant offsets and strides. + // CHECK: Op: %[[SV0:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [1, 1] signed : [1, 1]}] + // CHECK-SAME: sizes = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: strides = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [4, 4] signed : [4, 4]}, {unsigned : [1, 1] signed : [1, 1]}] + %subview = memref.subview %arg0[%c0, %c0, %c1] [%arg3, %arg4, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[64, 4, 1]>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a subview of a subview, with bounded dynamic offsets. + // CHECK: Op: %[[SV1:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [346, 484] signed : [346, 484]}] + // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}] + // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}] + %subview_0 = memref.subview %subview[%1, %1, %1] [%c2, %c2, %c2] [%0, %0, %0] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a subview of a subview, with constant operands. + // CHECK: Op: %[[SV2:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [368, 510] signed : [368, 510]}] + // CHECK-SAME: sizes = [{unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}, {unsigned : [2, 2] signed : [2, 2]}] + // CHECK-SAME: strides = [{unsigned : [704, 832] signed : [704, 832]}, {unsigned : [44, 52] signed : [44, 52]}, {unsigned : [11, 13] signed : [11, 13]}] + %subview_1 = memref.subview %subview_0[%c0, %c0, %c2] [%c2, %c2, %c2] [%c1, %c1, %c1] : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + + // Test a rank-reducing subview. + // CHECK: Op: %[[SV3:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: sizes = [{unsigned : [64, 64] signed : [64, 64]}, {unsigned : [16, 16] signed : [16, 16]}] + // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + %subview_2 = memref.subview %arg1[%arg4, %arg4, %arg4, %arg4, %arg4] [1, 64, 1, 16, 1] [%arg5, %arg5, %arg5, %arg5, %arg5] : memref<1x128x1x32x1xf32, strided<[4096, 32, 32, 1, 1]>> to memref<64x16xf32, strided<[?, ?], offset: ?>> + + // Test a subview of a rank-reducing subview + // CHECK: Op: %[[SV4:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: sizes = [{unsigned : [5, 7] signed : [5, 7]}] + // CHECK-SAME: strides = [{unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + %subview_3 = memref.subview %subview_2[%c0, %0] [1, %1] [%c1, %c2] : memref<64x16xf32, strided<[?, ?], offset: ?>> to memref<?xf32, strided<[?], offset: ?>> + + // Test a subview with mixed bounded and unbound dynamic sizes. + // CHECK: Op: %[[SV5:.*]] = memref.subview + // CHECK-NEXT: result[0]: strided_metadata< + // CHECK-SAME: offset = [{unsigned : [32, 32] signed : [32, 32]}] + // CHECK-SAME: sizes = [{unsigned : [11, 13] signed : [11, 13]}, {unsigned : [5, 7] signed : [5, 7]}, {unsigned : [0, 18446744073709551615] signed : [-9223372036854775808, 9223372036854775807]}] + // CHECK-SAME: strides = [{unsigned : [1, 1] signed : [1, 1]}, {unsigned : [64, 64] signed : [64, 64]}, {unsigned : [8, 8] signed : [8, 8]}] + %subview_4 = memref.subview %arg2[%c0, %c0, %c2] [%0, %1, %arg5] [%c1, %c1, %c1] : memref<8x16x4xf32, strided<[1, 64, 8], offset: 16>> to memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>> + return +} + +// CHECK: func.func @memref_subview +// CHECK: %[[A0:.*]]: memref<8x16x4xf32, strided<[64, 4, 1]>> +// CHECK: %[[SV0]] = memref.subview %[[A0]] +// CHECK-NEXT: %[[SV1]] = memref.subview +// CHECK-NEXT: %[[SV2]] = memref.subview +// CHECK-NEXT: %[[SV3]] = memref.subview +// CHECK-NEXT: %[[SV4]] = memref.subview +// CHECK-NEXT: %[[SV5]] = memref.subview diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir index 8cbee61..d71adee 100644 --- a/mlir/test/Analysis/test-alias-analysis.mlir +++ b/mlir/test/Analysis/test-alias-analysis.mlir @@ -256,3 +256,19 @@ func.func @constants(%arg: memref<2xf32>) attributes {test.ptr = "func"} { return } + +// ----- + +// CHECK-LABEL: Testing : "distinct_objects" +// CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias + +// CHECK-DAG: distinct#0 <-> distinct#1: NoAlias +// CHECK-DAG: distinct#0 <-> func.region0#0: MustAlias +// CHECK-DAG: distinct#1 <-> func.region0#0: MayAlias +// CHECK-DAG: distinct#0 <-> func.region0#1: MayAlias +// CHECK-DAG: distinct#1 <-> func.region0#1: MustAlias + +func.func @distinct_objects(%arg: memref<?xf32>, %arg1: memref<?xf32>) attributes {test.ptr = "func"} { + %0, %1 = memref.distinct_objects %arg, %arg1 {test.ptr = "distinct"} : memref<?xf32>, memref<?xf32> + return +} diff --git a/mlir/test/Conversion/MathToXeVM/lit.local.cfg b/mlir/test/Conversion/MathToXeVM/lit.local.cfg new file mode 100644 index 0000000..cc1ce35 --- /dev/null +++ b/mlir/test/Conversion/MathToXeVM/lit.local.cfg @@ -0,0 +1,7 @@ +spirv_backend_tests = [ + 'native-spirv-builtins.mlir', +] + +# Exclude SPIRV backend tests if SPIRV target is disabled: +if(not config.run_xevm_tests): + config.excludes.update(spirv_backend_tests) diff --git a/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir new file mode 100644 index 0000000..d76627b --- /dev/null +++ b/mlir/test/Conversion/MathToXeVM/math-to-xevm.mlir @@ -0,0 +1,155 @@ +// RUN: mlir-opt %s -convert-math-to-xevm \ +// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-ARITH' +// RUN: mlir-opt %s -convert-math-to-xevm='convert-arith=false' \ +// RUN: | FileCheck %s -check-prefixes='CHECK,CHECK-NO-ARITH' + +module @test_module { + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16 + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32 + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64 + // + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv2_d(vector<2xf64>) -> vector<2xf64> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv3_d(vector<3xf64>) -> vector<3xf64> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_d(vector<4xf64>) -> vector<4xf64> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv8_d(vector<8xf64>) -> vector<8xf64> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_d(vector<16xf64>) -> vector<16xf64> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv16_f(vector<16xf32>) -> vector<16xf32> + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_expDv4_Dh(vector<4xf16>) -> vector<4xf16> + // + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16 + // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32 + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16 + // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32 + // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_log10d(f64) -> f64 + // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_powrDhDh(f16, f16) -> f16 + // CHECK-DAG: llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64 + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16 + // CHECK-DAG: llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32 + // CHECK-DAG: llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64 + // CHECK-ARITH-DAG: llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32 + + // CHECK-LABEL: func @math_ops + func.func @math_ops() { + + %c1_f16 = arith.constant 1. : f16 + %c1_f32 = arith.constant 1. : f32 + %c1_f64 = arith.constant 1. : f64 + + // CHECK: math.exp + %exp_normal_f16 = math.exp %c1_f16 : f16 + // CHECK: math.exp + %exp_normal_f32 = math.exp %c1_f32 : f32 + // CHECK: math.exp + %exp_normal_f64 = math.exp %c1_f64 : f64 + + // Check float operations are converted properly: + + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16 + %exp_fast_f16 = math.exp %c1_f16 fastmath<fast> : f16 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f32) -> f32 + %exp_fast_f32 = math.exp %c1_f32 fastmath<fast> : f32 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f64) -> f64 + %exp_fast_f64 = math.exp %c1_f64 fastmath<fast> : f64 + + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + %exp_afn_f16 = math.exp %c1_f16 fastmath<afn> : f16 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + %exp_afn_f32 = math.exp %c1_f32 fastmath<afn> : f32 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + %exp_afn_f64 = math.exp %c1_f64 fastmath<afn> : f64 + + // CHECK: math.exp + %exp_none_f16 = math.exp %c1_f16 fastmath<none> : f16 + // CHECK: math.exp + %exp_none_f32 = math.exp %c1_f32 fastmath<none> : f32 + // CHECK: math.exp + %exp_none_f64 = math.exp %c1_f64 fastmath<none> : f64 + + // Check vector operations: + + %v2_c1_f64 = arith.constant dense<1.> : vector<2xf64> + %v3_c1_f64 = arith.constant dense<1.> : vector<3xf64> + %v4_c1_f64 = arith.constant dense<1.> : vector<4xf64> + %v8_c1_f64 = arith.constant dense<1.> : vector<8xf64> + %v16_c1_f64 = arith.constant dense<1.> : vector<16xf64> + + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv2_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<2xf64>) -> vector<2xf64> + %exp_v2_f64 = math.exp %v2_c1_f64 fastmath<afn> : vector<2xf64> + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv3_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<3xf64>) -> vector<3xf64> + %exp_v3_f64 = math.exp %v3_c1_f64 fastmath<afn> : vector<3xf64> + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<4xf64>) -> vector<4xf64> + %exp_v4_f64 = math.exp %v4_c1_f64 fastmath<afn> : vector<4xf64> + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv8_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64> + %exp_v8_f64 = math.exp %v8_c1_f64 fastmath<afn> : vector<8xf64> + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf64>) -> vector<16xf64> + %exp_v16_f64 = math.exp %v16_c1_f64 fastmath<afn> : vector<16xf64> + + %v16_c1_f32 = arith.constant dense<1.> : vector<16xf32> + %v4_c1_f16 = arith.constant dense<1.> : vector<4xf16> + + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv16_f(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<16xf32>) -> vector<16xf32> + %exp_v16_f32 = math.exp %v16_c1_f32 fastmath<fast> : vector<16xf32> + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDv4_Dh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (vector<4xf16>) -> vector<4xf16> + %exp_v4_f16 = math.exp %v4_c1_f16 fastmath<fast> : vector<4xf16> + + // Check unsupported vector sizes are not converted: + + %v5_c1_f64 = arith.constant dense<1.> : vector<5xf64> + %v32_c1_f64 = arith.constant dense<1.> : vector<32xf64> + + // CHECK: math.exp + %exp_v5_f64 = math.exp %v5_c1_f64 fastmath<afn> : vector<5xf64> + // CHECK: math.exp + %exp_v32_f64 = math.exp %v32_c1_f64 fastmath<afn> : vector<32xf64> + + // Check fastmath flags propagate properly: + + // CHECK: llvm.call @_Z22__spirv_ocl_native_expDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<fast>} : (f16) -> f16 + %exp_fastmath_all_f16 = math.exp %c1_f16 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f16 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, ninf, nsz, arcp, contract, afn>} : (f32) -> f32 + %exp_fastmath_most_f32 = math.exp %c1_f32 fastmath<nnan,ninf,nsz,arcp,contract,afn> : f32 + // CHECK: llvm.call @_Z22__spirv_ocl_native_expf(%{{.*}}) {fastmathFlags = #llvm.fastmath<nnan, afn, reassoc>} : (f32) -> f32 + %exp_afn_reassoc_nnan_f32 = math.exp %c1_f32 fastmath<afn,reassoc,nnan> : f32 + + // Check all other math operations: + + // CHECK: llvm.call @_Z22__spirv_ocl_native_cosDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + %cos_afn_f16 = math.cos %c1_f16 fastmath<afn> : f16 + + // CHECK: llvm.call @_Z23__spirv_ocl_native_exp2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + %exp2_afn_f32 = math.exp2 %c1_f32 fastmath<afn> : f32 + + // CHECK: llvm.call @_Z22__spirv_ocl_native_logDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + %log_afn_f16 = math.log %c1_f16 fastmath<afn> : f16 + + // CHECK: llvm.call @_Z23__spirv_ocl_native_log2f(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + %log2_afn_f32 = math.log2 %c1_f32 fastmath<afn> : f32 + + // CHECK: llvm.call @_Z24__spirv_ocl_native_log10d(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + %log10_afn_f64 = math.log10 %c1_f64 fastmath<afn> : f64 + + // CHECK: llvm.call @_Z23__spirv_ocl_native_powrDhDh(%{{.*}}, %{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16, f16) -> f16 + %powr_afn_f16 = math.powf %c1_f16, %c1_f16 fastmath<afn> : f16 + + // CHECK: llvm.call @_Z24__spirv_ocl_native_rsqrtd(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + %rsqrt_afn_f64 = math.rsqrt %c1_f64 fastmath<afn> : f64 + + // CHECK: llvm.call @_Z22__spirv_ocl_native_sinDh(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + %sin_afn_f16 = math.sin %c1_f16 fastmath<afn> : f16 + + // CHECK: llvm.call @_Z23__spirv_ocl_native_sqrtf(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + %sqrt_afn_f32 = math.sqrt %c1_f32 fastmath<afn> : f32 + + // CHECK: llvm.call @_Z22__spirv_ocl_native_tand(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + %tan_afn_f64 = math.tan %c1_f64 fastmath<afn> : f64 + + %c6_9_f32 = arith.constant 6.9 : f32 + %c7_f32 = arith.constant 7. : f32 + + // CHECK-ARITH: llvm.call @_Z25__spirv_ocl_native_divideff(%{{.*}}) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32 + // CHECK-NO-ARITH: arith.divf + %divf_afn_f32 = arith.divf %c6_9_f32, %c7_f32 fastmath<afn> : f32 + + return + } +} diff --git a/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir new file mode 100644 index 0000000..82426c4 --- /dev/null +++ b/mlir/test/Conversion/MathToXeVM/native-spirv-builtins.mlir @@ -0,0 +1,119 @@ +// RUN: mlir-opt %s -gpu-module-to-binary="format=isa" \ +// RUN: -debug-only=serialize-to-isa 2> %t +// RUN: FileCheck --input-file=%t %s +// REQUIRES: asserts +// +// MathToXeVM pass generates OpenCL intrinsics function calls when converting +// Math ops with `fastmath` attr to native function calls. It is assumed that +// the SPIRV backend would correctly convert these intrinsics calls to OpenCL +// ExtInst instructions in SPIRV (See llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp). +// +// To ensure this assumption holds, this test verifies that the SPIRV backend +// behaves as expected. + +module @test_ocl_intrinsics attributes {gpu.container_module} { + gpu.module @kernel [#xevm.target] { + llvm.func spir_kernelcc @native_fcns() attributes {gpu.kernel} { + // CHECK-DAG: %[[F16T:.+]] = OpTypeFloat 16 + // CHECK-DAG: %[[ZERO_F16:.+]] = OpConstantNull %[[F16T]] + %c0_f16 = llvm.mlir.constant(0. : f16) : f16 + // CHECK-DAG: %[[F32T:.+]] = OpTypeFloat 32 + // CHECK-DAG: %[[ZERO_F32:.+]] = OpConstantNull %[[F32T]] + %c0_f32 = llvm.mlir.constant(0. : f32) : f32 + // CHECK-DAG: %[[F64T:.+]] = OpTypeFloat 64 + // CHECK-DAG: %[[ZERO_F64:.+]] = OpConstantNull %[[F64T]] + %c0_f64 = llvm.mlir.constant(0. : f64) : f64 + + // CHECK-DAG: %[[V2F64T:.+]] = OpTypeVector %[[F64T]] 2 + // CHECK-DAG: %[[V2_ZERO_F64:.+]] = OpConstantNull %[[V2F64T]] + %v2_c0_f64 = llvm.mlir.constant(dense<0.> : vector<2xf64>) : vector<2xf64> + // CHECK-DAG: %[[V3F32T:.+]] = OpTypeVector %[[F32T]] 3 + // CHECK-DAG: %[[V3_ZERO_F32:.+]] = OpConstantNull %[[V3F32T]] + %v3_c0_f32 = llvm.mlir.constant(dense<0.> : vector<3xf32>) : vector<3xf32> + // CHECK-DAG: %[[V4F64T:.+]] = OpTypeVector %[[F64T]] 4 + // CHECK-DAG: %[[V4_ZERO_F64:.+]] = OpConstantNull %[[V4F64T]] + %v4_c0_f64 = llvm.mlir.constant(dense<0.> : vector<4xf64>) : vector<4xf64> + // CHECK-DAG: %[[V8F64T:.+]] = OpTypeVector %[[F64T]] 8 + // CHECK-DAG: %[[V8_ZERO_F64:.+]] = OpConstantNull %[[V8F64T]] + %v8_c0_f64 = llvm.mlir.constant(dense<0.> : vector<8xf64>) : vector<8xf64> + // CHECK-DAG: %[[V16F16T:.+]] = OpTypeVector %[[F16T]] 16 + // CHECK-DAG: %[[V16_ZERO_F16:.+]] = OpConstantNull %[[V16F16T]] + %v16_c0_f16 = llvm.mlir.constant(dense<0.> : vector<16xf16>) : vector<16xf16> + + // CHECK: OpExtInst %[[F16T]] %{{.+}} native_exp %[[ZERO_F16]] + %exp_f16 = llvm.call @_Z22__spirv_ocl_native_expDh(%c0_f16) : (f16) -> f16 + // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp %[[ZERO_F32]] + %exp_f32 = llvm.call @_Z22__spirv_ocl_native_expf(%c0_f32) : (f32) -> f32 + // CHECK: OpExtInst %[[F64T]] %{{.+}} native_exp %[[ZERO_F64]] + %exp_f64 = llvm.call @_Z22__spirv_ocl_native_expd(%c0_f64) : (f64) -> f64 + + // CHECK: OpExtInst %[[V2F64T]] %{{.+}} native_exp %[[V2_ZERO_F64]] + %exp_v2_f64 = llvm.call @_Z22__spirv_ocl_native_expDv2_f64(%v2_c0_f64) : (vector<2xf64>) -> vector<2xf64> + // CHECK: OpExtInst %[[V3F32T]] %{{.+}} native_exp %[[V3_ZERO_F32]] + %exp_v3_f32 = llvm.call @_Z22__spirv_ocl_native_expDv3_f32(%v3_c0_f32) : (vector<3xf32>) -> vector<3xf32> + // CHECK: OpExtInst %[[V4F64T]] %{{.+}} native_exp %[[V4_ZERO_F64]] + %exp_v4_f64 = llvm.call @_Z22__spirv_ocl_native_expDv4_f64(%v4_c0_f64) : (vector<4xf64>) -> vector<4xf64> + // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_exp %[[V8_ZERO_F64]] + %exp_v8_f64 = llvm.call @_Z22__spirv_ocl_native_expDv8_f64(%v8_c0_f64) : (vector<8xf64>) -> vector<8xf64> + // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_exp %[[V16_ZERO_F16]] + %exp_v16_f16 = llvm.call @_Z22__spirv_ocl_native_expDv16_f16(%v16_c0_f16) : (vector<16xf16>) -> vector<16xf16> + + // SPIRV backend does not currently handle fastmath flags: The SPIRV + // backend would need to generate OpDecorate calls to decorate math ops + // with FPFastMathMode/FPFastMathModeINTEL decorations. + // + // FIXME: When support for fastmath flags in the SPIRV backend is added, + // add tests here to ensure fastmath flags are converted to the correct + // OpDecorate calls. + // + // See: + // - https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_math_extended_instructions + // - https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpDecorate + + // CHECK: OpExtInst %[[F16T]] %{{.+}} native_cos %[[ZERO_F16]] + %cos_afn_f16 = llvm.call @_Z22__spirv_ocl_native_cosDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + // CHECK: OpExtInst %[[F32T]] %{{.+}} native_exp2 %[[ZERO_F32]] + %exp2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_exp2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + // CHECK: OpExtInst %[[F16T]] %{{.+}} native_log %[[ZERO_F16]] + %log_afn_f16 = llvm.call @_Z22__spirv_ocl_native_logDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + // CHECK: OpExtInst %[[F32T]] %{{.+}} native_log2 %[[ZERO_F32]] + %log2_afn_f32 = llvm.call @_Z23__spirv_ocl_native_log2f(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + // CHECK: OpExtInst %[[V8F64T]] %{{.+}} native_log10 %[[V8_ZERO_F64]] + %log10_afn_f64 = llvm.call @_Z24__spirv_ocl_native_log10Dv8_d(%v8_c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (vector<8xf64>) -> vector<8xf64> + // CHECK: OpExtInst %[[V16F16T]] %{{.+}} native_powr %[[V16_ZERO_F16]] %[[V16_ZERO_F16]] + %powr_afn_f16 = llvm.call @_Z23__spirv_ocl_native_powrDv16_DhS_(%v16_c0_f16, %v16_c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (vector<16xf16>, vector<16xf16>) -> vector<16xf16> + // CHECK: OpExtInst %[[F64T]] %{{.+}} native_rsqrt %[[ZERO_F64]] + %rsqrt_afn_f64 = llvm.call @_Z24__spirv_ocl_native_rsqrtd(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + // CHECK: OpExtInst %[[F16T]] %{{.+}} native_sin %[[ZERO_F16]] + %sin_afn_f16 = llvm.call @_Z22__spirv_ocl_native_sinDh(%c0_f16) {fastmathFlags = #llvm.fastmath<afn>} : (f16) -> f16 + // CHECK: OpExtInst %[[F32T]] %{{.+}} native_sqrt %[[ZERO_F32]] + %sqrt_afn_f32 = llvm.call @_Z23__spirv_ocl_native_sqrtf(%c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32) -> f32 + // CHECK: OpExtInst %[[F64T]] %{{.+}} native_tan %[[ZERO_F64]] + %tan_afn_f64 = llvm.call @_Z22__spirv_ocl_native_tand(%c0_f64) {fastmathFlags = #llvm.fastmath<afn>} : (f64) -> f64 + // CHECK: OpExtInst %[[F32T]] %{{.+}} native_divide %[[ZERO_F32]] %[[ZERO_F32]] + %divide_afn_f32 = llvm.call @_Z25__spirv_ocl_native_divideff(%c0_f32, %c0_f32) {fastmathFlags = #llvm.fastmath<afn>} : (f32, f32) -> f32 + + llvm.return + } + + llvm.func @_Z22__spirv_ocl_native_expDh(f16) -> f16 + llvm.func @_Z22__spirv_ocl_native_expf(f32) -> f32 + llvm.func @_Z22__spirv_ocl_native_expd(f64) -> f64 + llvm.func @_Z22__spirv_ocl_native_expDv2_f64(vector<2xf64>) -> vector<2xf64> + llvm.func @_Z22__spirv_ocl_native_expDv3_f32(vector<3xf32>) -> vector<3xf32> + llvm.func @_Z22__spirv_ocl_native_expDv4_f64(vector<4xf64>) -> vector<4xf64> + llvm.func @_Z22__spirv_ocl_native_expDv8_f64(vector<8xf64>) -> vector<8xf64> + llvm.func @_Z22__spirv_ocl_native_expDv16_f16(vector<16xf16>) -> vector<16xf16> + llvm.func @_Z22__spirv_ocl_native_cosDh(f16) -> f16 + llvm.func @_Z23__spirv_ocl_native_exp2f(f32) -> f32 + llvm.func @_Z22__spirv_ocl_native_logDh(f16) -> f16 + llvm.func @_Z23__spirv_ocl_native_log2f(f32) -> f32 + llvm.func @_Z24__spirv_ocl_native_log10Dv8_d(vector<8xf64>) -> vector<8xf64> + llvm.func @_Z23__spirv_ocl_native_powrDv16_DhS_(vector<16xf16>, vector<16xf16>) -> vector<16xf16> + llvm.func @_Z24__spirv_ocl_native_rsqrtd(f64) -> f64 + llvm.func @_Z22__spirv_ocl_native_sinDh(f16) -> f16 + llvm.func @_Z23__spirv_ocl_native_sqrtf(f32) -> f32 + llvm.func @_Z22__spirv_ocl_native_tand(f64) -> f64 + llvm.func @_Z25__spirv_ocl_native_divideff(f32, f32) -> f32 + } +} diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index a7a73ae..780c25a 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1538,6 +1538,92 @@ func.func @unsupportedRescaleInexactRound(%arg0 : tensor<2xi8>) -> (tensor<2xi8> // ----- +// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()> +// CHECK-LABEL: @rescale_no_const +// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]] +func.func @rescale_no_const(%arg0 : tensor<2xi8>, %multiplier : tensor<1xi32>, %shift : tensor<1xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) { + // CHECK: [[MULTIPLIER:%.+]] = tensor.collapse_shape %arg1 [] : tensor<1xi32> into tensor<i32> + // CHECK: [[SHIFT:%.+]] = tensor.collapse_shape %arg2 [] : tensor<1xi8> into tensor<i8> + // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8> + // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8> + // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8> + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[MULTIPLIER]], [[SHIFT]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<i32>, tensor<i8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) { + // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8): + // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32 + // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32 + // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32 + // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32 + // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32 + // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32 + // CHECK: %c-128_i32 = arith.constant -128 : i32 + // CHECK: %c127_i32 = arith.constant 127 : i32 + // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32 + // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32 + %0 = tosa.rescale %arg0, %multiplier, %shift, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = false, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<1xi32>, tensor<1xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8> + return %0 : tensor<2xi8> +} + +// ----- + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()> +// CHECK-LABEL: @rescale_no_const_per_channel +// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]] +// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]] +// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]] +func.func @rescale_no_const_per_channel(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xi8>, %output_zp : tensor<1xi8>) -> (tensor<2xi8>) { + // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xi8> into tensor<i8> + // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xi8> into tensor<i8> + // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xi8> + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<i8>, tensor<i8>) outs([[INIT]] : tensor<2xi8>) { + // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: i8, [[ARG4:%.*]]: i8, [[OUT:%.*]]: i8): + // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extsi [[ARG3]] : i8 to i32 + // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extsi [[ARG4]] : i8 to i32 + // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32 + // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32 + // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32 + // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32 + // CHECK: %c-128_i32 = arith.constant -128 : i32 + // CHECK: %c127_i32 = arith.constant 127 : i32 + // CHECK: [[MAX:%.+]] = arith.maxsi %c-128_i32, [[TMP3]] : i32 + // CHECK: [[MIN:%.+]] = arith.minsi %c127_i32, [[MAX]] : i32 + %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = false} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2xi8> + return %0 : tensor<2xi8> +} + +// ----- + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0) -> (d0)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0) -> ()> +// CHECK-LABEL: @rescale_no_const_per_channel_input_output_zp_ui8 +// CHECK-SAME: ([[ARG0:%[0-9a-zA-Z_]*]] +// CHECK-SAME: [[ARG1:%[0-9a-zA-Z_]*]] +// CHECK-SAME: [[ARG2:%[0-9a-zA-Z_]*]] +func.func @rescale_no_const_per_channel_input_output_zp_ui8(%arg0 : tensor<2xi8>, %arg1 : tensor<2xi32>, %arg2 : tensor<2xi8>, %input_zp : tensor<1xui8>, %output_zp : tensor<1xui8>) -> (tensor<2xui8>) { + // CHECK: [[INPUT_ZP:%.+]] = tensor.collapse_shape %arg3 [] : tensor<1xui8> into tensor<ui8> + // CHECK: [[OUTPUT_ZP:%.+]] = tensor.collapse_shape %arg4 [] : tensor<1xui8> into tensor<ui8> + // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xui8> + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP1]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins([[ARG0]], [[ARG1]], [[ARG2]], [[INPUT_ZP]], [[OUTPUT_ZP]] : tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<ui8>, tensor<ui8>) outs([[INIT]] : tensor<2xui8>) { + // CHECK: ^bb0([[ARG0:%.*]]: i8, [[ARG1:%.*]]: i32, [[ARG2:%.*]]: i8, [[ARG3:%.*]]: ui8, [[ARG4:%.*]]: ui8, [[OUT:%.*]]: ui8): + // CHECK: [[INPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG3]] : ui8 to i8 + // CHECK: [[INPUT_ZP_I32:%.+]] = arith.extui [[INPUT_ZP_I8]] : i8 to i32 + // CHECK: [[OUTPUT_ZP_I8:%.+]] = builtin.unrealized_conversion_cast [[ARG4]] : ui8 to i8 + // CHECK: [[OUTPUT_ZP_I32:%.+]] = arith.extui [[OUTPUT_ZP_I8]] : i8 to i32 + // CHECK: [[ARG0_I32:%.+]] = arith.extsi [[ARG0]] : i8 to i32 + // CHECK: [[TMP1:%.+]] = arith.subi [[ARG0_I32]], [[INPUT_ZP_I32]] : i32 + // CHECK: [[TMP2:%.+]] = tosa.apply_scale [[TMP1]], [[ARG1]], [[ARG2]] {rounding_mode = DOUBLE_ROUND} : (i32, i32, i8) -> i32 + // CHECK: [[TMP3:%.+]] = arith.addi [[TMP2]], [[OUTPUT_ZP_I32]] : i32 + // CHECK: %c0_i32 = arith.constant 0 : i32 + // CHECK: %c255_i32 = arith.constant 255 : i32 + // CHECK: [[MAX:%.+]] = arith.maxsi %c0_i32, [[TMP3]] : i32 + // CHECK: [[MIN:%.+]] = arith.minsi %c255_i32, [[MAX]] : i32 + %0 = tosa.rescale %arg0, %arg1, %arg2, %input_zp, %output_zp {scale32 = true, rounding_mode = DOUBLE_ROUND, per_channel = true, input_unsigned = false, output_unsigned = true} : (tensor<2xi8>, tensor<2xi32>, tensor<2xi8>, tensor<1xui8>, tensor<1xui8>) -> tensor<2xui8> + return %0 : tensor<2xui8> +} + +// ----- + // CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: @reverse diff --git a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir index 0b150e9..9c552d8 100644 --- a/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir +++ b/mlir/test/Conversion/XeGPUToXeVM/loadstoreprefetch.mlir @@ -14,19 +14,36 @@ gpu.func @load_gather_i64_src_value_offset(%src: i64, %offset: vector<1xindex>) // CHECK: %[[VAR4:.*]] = arith.addi %[[ARG0]], %[[VAR3]] : i64 // CHECK: %[[VAR5:.*]] = llvm.inttoptr %[[VAR4]] : i64 to !llvm.ptr<1> // CHECK: %[[VAR6:.*]] = scf.if %[[VAR2]] -> (f16) { - // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> vector<1xf16> - // CHECK: %[[VAR8:.*]] = vector.extract %[[VAR7]][0] : f16 from vector<1xf16> - // CHECK: scf.yield %[[VAR8]] : f16 - // CHECK: } else { - // CHECK: %[[CST_0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf16> - // CHECK: %[[VAR7:.*]] = vector.extract %[[CST_0]][0] : f16 from vector<1xf16> + // CHECK: %[[VAR7:.*]] = llvm.load %[[VAR5]] {cache_control = #xevm.load_cache_control<L1c_L2uc_L3uc>} : !llvm.ptr<1> -> f16 // CHECK: scf.yield %[[VAR7]] : f16 + // CHECK: } else { + // CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f16 + // CHECK: scf.yield %[[CST_0]] : f16 // CHECK: } %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16> gpu.return } } + +// ----- +gpu.module @test { +// CHECK-LABEL: @source_materialize_single_elem_vec +// CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: vector<1xindex>, %[[ARG2:.*]]: memref<1xf16> +gpu.func @source_materialize_single_elem_vec(%src: i64, %offset: vector<1xindex>, %dst: memref<1xf16>) { + %1 = arith.constant dense<1>: vector<1xi1> + %3 = xegpu.load %src[%offset], %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> + : i64, vector<1xindex>, vector<1xi1> -> vector<1xf16> + // CHECK: %[[VAR_IF:.*]] = scf.if + // CHECK: %[[VAR_RET:.*]] = vector.broadcast %[[VAR_IF]] : f16 to vector<1xf16> + // CHECK: %[[C0:.*]] = arith.constant 0 : index + // CHECK: vector.store %[[VAR_RET]], %[[ARG2]][%[[C0]]] : memref<1xf16>, vector<1xf16> + %c0 = arith.constant 0 : index + vector.store %3, %dst[%c0] : memref<1xf16>, vector<1xf16> + gpu.return +} +} + // ----- gpu.module @test { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir index c58b153..21b508e 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -65,13 +65,13 @@ func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) { // ----- -func.func @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> { +func.func private @return_arg(%A: tensor<?xf32>) -> tensor<?xf32> { func.return %A : tensor<?xf32> } -// CHECK-LABEL: func @return_arg +// CHECK-LABEL: func private @return_arg // CHECK-SAME: %[[A:.*]]: memref<?xf32 // CHECK-NOT: return %[[A]] -// NO-DROP-LABEL: func @return_arg +// NO-DROP-LABEL: func private @return_arg // NO-DROP-SAME: %[[A:.*]]: memref<?xf32 // NO-DROP: return %[[A]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 6054a61..d5f834b 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -171,9 +171,9 @@ func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { // Bufferization of a function that is reading and writing. %t0 is writable, so // no copy should be inserted. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { +func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { // CHECK-NOT: copy %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index @@ -186,9 +186,9 @@ func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { return %0, %1 : tensor<?xf32>, f32 } -// CHECK-LABEL: func @call_func_with_non_tensor_return( +// CHECK-LABEL: func private @call_func_with_non_tensor_return( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @call_func_with_non_tensor_return( +func.func private @call_func_with_non_tensor_return( %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { // CHECK-NOT: alloc // CHECK-NOT: copy @@ -203,9 +203,9 @@ func.func @call_func_with_non_tensor_return( // Bufferization of a function that is reading and writing. %t0 is not writable, // so a copy is needed. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { +func.func private @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { // CHECK-NOT: copy %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index @@ -276,10 +276,10 @@ func.func @main(%t: tensor<?xf32> {bufferization.writable = false}) -> (f32) { // This function does not read, just write. We need an alloc, but no copy. -// CHECK-LABEL: func @does_not_read( +// CHECK-LABEL: func private @does_not_read( // CHECK-NOT: alloc // CHECK-NOT: copy -func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { +func.func private @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> { %f0 = arith.constant 0.0 : f32 %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32> return %r : tensor<?xf32> @@ -354,9 +354,9 @@ func.func @main() { // A write inside an scf.execute_region. An equivalent tensor is yielded. -// CHECK-LABEL: func @execute_region_test( +// CHECK-LABEL: func private @execute_region_test( // CHECK-SAME: %[[m1:.*]]: memref<?xf32 -func.func @execute_region_test(%t1 : tensor<?xf32>) +func.func private @execute_region_test(%t1 : tensor<?xf32>) -> (f32, tensor<?xf32>, f32) { %f1 = arith.constant 0.0 : f32 @@ -397,11 +397,11 @@ func.func @no_inline_execute_region_not_canonicalized() { // CHECK: func private @some_external_func(memref<?xf32, strided<[?], offset: ?>>) func.func private @some_external_func(tensor<?xf32>) -// CHECK: func @scf_for_with_tensor_insert_slice( +// CHECK: func private @scf_for_with_tensor_insert_slice( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @scf_for_with_tensor_insert_slice( +func.func private @scf_for_with_tensor_insert_slice( %A : tensor<?xf32>, %B : tensor<?xf32>, %C : tensor<4xf32>, %lb : index, %ub : index, %step : index) -> (tensor<?xf32>, tensor<?xf32>) @@ -456,11 +456,11 @@ func.func @bar( // ----- -// CHECK: func @init_and_dot( +// CHECK: func private @init_and_dot( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<64xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<f32, strided<[], offset: ?>> -func.func @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { +func.func private @init_and_dot(%a: tensor<64xf32>, %b: tensor<64xf32>, %c: tensor<f32>) -> tensor<f32> { // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 %v0 = arith.constant 0.0 : f32 @@ -574,9 +574,9 @@ func.func @entry(%A : tensor<?xf32> {bufferization.buffer_layout = affine_map<(i // No alloc or copy inside of the loop. -// CHECK-LABEL: func @inner_func( +// CHECK-LABEL: func private @inner_func( // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 -func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { +func.func private @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { %f = arith.constant 1.0 : f32 %c0 = arith.constant 0 : index // CHECK: memref.store %{{.*}}, %[[arg0]] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir index e2ab876..b52612d 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir @@ -24,10 +24,46 @@ // CHECK-NOT: copy // CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]]) %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) - // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}> + // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32{{.*}}> return %1, %0 : f32, tensor<?xf32> } "test.finish" () : () -> () }) : () -> () +// ----- +#enc1 = #test.tensor_encoding<"hello"> +#enc2 = #test.tensor_encoding<"not hello"> + +"test.symbol_scope_isolated"() ({ + // CHECK: func @inner_func( + // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>) + // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"hello">> + func.func @inner_func(%t: tensor<?xf32, #enc1>) + -> tensor<?xf32, #enc1> { + // CHECK: return %[[arg0]] + return %t : tensor<?xf32, #enc1> + } + + // CHECK: func @outer_func( + // CHECK-SAME: %[[arg0:.*]]: memref<?xf32, #test.memref_layout<"hello">>) + // CHECK-SAME: -> (memref<?xf32, #test.memref_layout<"hello">>, + // CHECK-SAME: memref<?xf32, #test.memref_layout<"not hello">>) + func.func @outer_func(%t0: tensor<?xf32, #enc1>) + -> (tensor<?xf32, #enc1>, tensor<?xf32, #enc2>) { + // CHECK: %[[call:.*]] = call @inner_func(%[[arg0]]) + %0 = call @inner_func(%t0) + : (tensor<?xf32, #enc1>) -> (tensor<?xf32, #enc1>) + + // CHECK: %[[local:.*]] = "test.create_memref_op"() : () + // CHECK-SAME: -> memref<?xf32, #test.memref_layout<"not hello">> + %local = "test.create_tensor_op"() : () -> tensor<?xf32, #enc2> + // CHECK: %[[dummy:.*]] = "test.dummy_memref_op"(%[[local]]) + %1 = "test.dummy_tensor_op"(%local) : (tensor<?xf32, #enc2>) + -> tensor<?xf32, #enc2> + + // CHECK: return %[[call]], %[[dummy]] + return %0, %1 : tensor<?xf32, #enc1>, tensor<?xf32, #enc2> + } + "test.finish" () : () -> () +}) : () -> () diff --git a/mlir/test/Dialect/LLVMIR/bytecode.mlir b/mlir/test/Dialect/LLVMIR/bytecode.mlir new file mode 100644 index 0000000..821b0ac --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/bytecode.mlir @@ -0,0 +1,35 @@ +// RUN: mlir-opt -verify-roundtrip %s + +#access_group = #llvm.access_group<id = distinct[0]<>> +#access_group1 = #llvm.access_group<id = distinct[1]<>> +#di_subprogram = #llvm.di_subprogram<recId = distinct[2]<>> +#loc1 = loc("test.f90":12:14) +#loc2 = loc("test":4:3) +#loc6 = loc(fused<#di_subprogram>[#loc1]) +#loc7 = loc(fused<#di_subprogram>[#loc2]) +#loop_annotation = #llvm.loop_annotation<disableNonforced = false, mustProgress = true, startLoc = #loc6, endLoc = #loc7, parallelAccesses = #access_group, #access_group1> +module { + llvm.func @imp_fn() { + llvm.return loc(#loc2) + } loc(#loc8) + llvm.func @loop_annotation_with_locs() { + llvm.br ^bb1 {loop_annotation = #loop_annotation} loc(#loc4) + ^bb1: // pred: ^bb0 + llvm.return loc(#loc5) + } loc(#loc3) +} loc(#loc) +#di_file = #llvm.di_file<"test.f90" in ""> +#di_subroutine_type = #llvm.di_subroutine_type<callingConvention = DW_CC_program> +#loc = loc("test":0:0) +#loc3 = loc("test-path":36:3) +#loc4 = loc("test-path":37:5) +#loc5 = loc("test-path":39:5) +#di_compile_unit = #llvm.di_compile_unit<id = distinct[3]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full> +#di_compile_unit1 = #llvm.di_compile_unit<id = distinct[4]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full> +#di_compile_unit2 = #llvm.di_compile_unit<id = distinct[5]<>, sourceLanguage = DW_LANG_Fortran95, file = #di_file, isOptimized = false, emissionKind = Full> +#di_module = #llvm.di_module<file = #di_file, scope = #di_compile_unit1, name = "mod1"> +#di_module1 = #llvm.di_module<file = #di_file, scope = #di_compile_unit2, name = "mod2"> +#di_imported_entity = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module, file = #di_file, line = 1> +#di_imported_entity1 = #llvm.di_imported_entity<tag = DW_TAG_imported_module, scope = #di_subprogram, entity = #di_module1, file = #di_file, line = 1> +#di_subprogram1 = #llvm.di_subprogram<recId = distinct[2]<>, id = distinct[6]<>, compileUnit = #di_compile_unit, scope = #di_file, name = "imp_fn", file = #di_file, subprogramFlags = Definition, type = #di_subroutine_type, retainedNodes = #di_imported_entity, #di_imported_entity1> +#loc8 = loc(fused<#di_subprogram1>[#loc1]) diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir index 1834b0a..d7bf99b 100644 --- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt -emit-bytecode %s | mlir-opt | FileCheck %s // CHECK-DAG: #[[FILE:.*]] = #llvm.di_file<"debuginfo.mlir" in "/test/"> #file = #llvm.di_file<"debuginfo.mlir" in "/test/"> diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 7344797..00e763a 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt -verify-roundtrip %s // CHECK-LABEL: func @baz @@ -757,7 +757,7 @@ llvm.func @stackrestore(%arg0: !llvm.ptr) { // CHECK-LABEL: @experimental_noalias_scope_decl llvm.func @experimental_noalias_scope_decl() { - // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}} + // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}} llvm.intr.experimental.noalias.scope.decl #alias_scope llvm.return } @@ -767,7 +767,7 @@ llvm.func @experimental_noalias_scope_decl() { // CHECK-LABEL: @experimental_noalias_scope_with_string_id llvm.func @experimental_noalias_scope_with_string_id() { - // CHECK: llvm.intr.experimental.noalias.scope.decl #{{.*}} + // CHECK: llvm.intr.experimental.noalias.scope.decl #alias_scope{{.*}} llvm.intr.experimental.noalias.scope.decl #alias_scope2 llvm.return } diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir index 618ba34..66cae5c 100644 --- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir +++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir @@ -1011,6 +1011,20 @@ module attributes { transform.target_tag = "start_here" } { } -> tensor<1x1x4xf32> return } + + func.func @generic_none(%arg0: tensor<128x128xi32>, %arg1: tensor<128x128xi32>, %arg2: tensor<128x128xi32>) { + %0 = linalg.generic { + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d2, d1)>, + affine_map<(d0, d1, d2) -> (d0, d1)>], + iterator_types = ["parallel", "parallel", "reduction"]} + ins(%arg0, %arg1 : tensor<128x128xi32>, tensor<128x128xi32>) + outs(%arg2 : tensor<128x128xi32>) { + ^bb0(%in: i32, %in_0: i32, %out: i32): + linalg.yield %out : i32 + } -> tensor<128x128xi32> + return + } } // ----- diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 9616a3e..1df15e8 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -10,10 +10,10 @@ // TODO: Some test cases from this file should be moved to other dialects. -// CHECK-LABEL: func @fill_inplace( +// CHECK-LABEL: func private @fill_inplace( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> -// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) { -func.func @fill_inplace( +// CHECK-NO-LAYOUT-MAP-LABEL: func private @fill_inplace(%{{.*}}: memref<?xf32>) { +func.func private @fill_inplace( %A : tensor<?xf32> {bufferization.writable = true}) -> tensor<?xf32> { @@ -56,10 +56,10 @@ func.func @not_inplace( // ----- -// CHECK-LABEL: func @not_inplace +// CHECK-LABEL: func private @not_inplace // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, strided<[?, ?], offset: ?>>) { -// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) { -func.func @not_inplace( +// CHECK-NO-LAYOUT-MAP-LABEL: func private @not_inplace(%{{.*}}: memref<?x?xf32>) { +func.func private @not_inplace( %A : tensor<?x?xf32> {bufferization.writable = true}) -> tensor<?x?xf32> { @@ -235,7 +235,7 @@ func.func @dominance_violation_bug_1( // ----- -func.func @gather_like( +func.func private @gather_like( %arg0 : tensor<?x?xf32> {bufferization.writable = false}, %arg1 : tensor<?xi32> {bufferization.writable = false}, %arg2 : tensor<?x?xf32> {bufferization.writable = true}) @@ -254,7 +254,7 @@ func.func @gather_like( } -> tensor<?x?xf32> return %0 : tensor<?x?xf32> } -// CHECK-LABEL: func @gather_like( +// CHECK-LABEL: func private @gather_like( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32, // CHECK-SAME: %[[ARG1:.+]]: memref<?xi32 // CHECK-SAME: %[[ARG2:.+]]: memref<?x?xf32 diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir new file mode 100644 index 0000000..35355c6 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/recipe-populate-firstprivate.mlir @@ -0,0 +1,102 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s + +// CHECK: acc.firstprivate.recipe @firstprivate_scalar : memref<f32> init { +// CHECK: ^bb0(%{{.*}}: memref<f32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32> +// CHECK: acc.yield %[[ALLOC]] : memref<f32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<f32>, %[[DST:.*]]: memref<f32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<f32> to memref<f32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar() { + %0 = memref.alloca() {test.var = "scalar"} : memref<f32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_static_2d : memref<10x20xf32> init { +// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<10x20xf32>, %[[DST:.*]]: memref<10x20xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x20xf32> to memref<10x20xf32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_static_2d() { + %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_dynamic_2d : memref<?x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>): +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32> +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<?x?xf32>, %[[DST:.*]]: memref<?x?xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<?x?xf32> to memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_dynamic_2d(%arg0: index, %arg1: index) { + %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_mixed_dims : memref<10x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>): +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<10x?xf32>, %[[DST:.*]]: memref<10x?xf32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<10x?xf32> to memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_mixed_dims(%arg0: index) { + %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32> + return +} + +// ----- + +// CHECK: acc.firstprivate.recipe @firstprivate_scalar_int : memref<i32> init { +// CHECK: ^bb0(%{{.*}}: memref<i32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32> +// CHECK: acc.yield %[[ALLOC]] : memref<i32> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: memref<i32>, %[[DST:.*]]: memref<i32>): +// CHECK: memref.copy %[[SRC]], %[[DST]] : memref<i32> to memref<i32> +// CHECK: acc.terminator +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar_int() { + %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32> + return +} + diff --git a/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir new file mode 100644 index 0000000..8403ee8 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/recipe-populate-private.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s + +// CHECK: acc.private.recipe @private_scalar : memref<f32> init { +// CHECK: ^bb0(%{{.*}}: memref<f32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<f32> +// CHECK: acc.yield %[[ALLOC]] : memref<f32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar() { + %0 = memref.alloca() {test.var = "scalar"} : memref<f32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_static_2d : memref<10x20xf32> init { +// CHECK: ^bb0(%{{.*}}: memref<10x20xf32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<10x20xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x20xf32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_static_2d() { + %0 = memref.alloca() {test.var = "static_2d"} : memref<10x20xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_dynamic_2d : memref<?x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<?x?xf32>): +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[DIM0:.*]] = memref.dim %[[ARG]], %[[C0]] : memref<?x?xf32> +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<?x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<?x?xf32> +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<?x?xf32>, %[[VAL:.*]]: memref<?x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<?x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_dynamic_2d(%arg0: index, %arg1: index) { + %0 = memref.alloc(%arg0, %arg1) {test.var = "dynamic_2d"} : memref<?x?xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_mixed_dims : memref<10x?xf32> init { +// CHECK: ^bb0(%[[ARG:.*]]: memref<10x?xf32>): +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM1:.*]] = memref.dim %[[ARG]], %[[C1]] : memref<10x?xf32> +// CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM1]]) : memref<10x?xf32> +// CHECK: acc.yield %[[ALLOC]] : memref<10x?xf32> +// CHECK: } destroy { +// CHECK: ^bb0(%{{.*}}: memref<10x?xf32>, %[[VAL:.*]]: memref<10x?xf32>): +// CHECK: memref.dealloc %[[VAL]] : memref<10x?xf32> +// CHECK: acc.terminator +// CHECK: } + +func.func @test_mixed_dims(%arg0: index) { + %0 = memref.alloc(%arg0) {test.var = "mixed_dims"} : memref<10x?xf32> + return +} + +// ----- + +// CHECK: acc.private.recipe @private_scalar_int : memref<i32> init { +// CHECK: ^bb0(%{{.*}}: memref<i32>): +// CHECK: %[[ALLOC:.*]] = memref.alloca() : memref<i32> +// CHECK: acc.yield %[[ALLOC]] : memref<i32> +// CHECK: } +// CHECK-NOT: destroy + +func.func @test_scalar_int() { + %0 = memref.alloca() {test.var = "scalar_int"} : memref<i32> + return +} + diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir index a1067ec..af09dc8 100644 --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -8,11 +8,11 @@ // Test bufferization using memref types that have no layout map. // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-return-allocs-from-loops unknown-type-conversion=identity-layout-map function-boundary-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null -// CHECK-LABEL: func @scf_for_yield_only( +// CHECK-LABEL: func private @scf_for_yield_only( // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: ) -> memref<?xf32> { -func.func @scf_for_yield_only( +func.func private @scf_for_yield_only( %A : tensor<?xf32> {bufferization.writable = false}, %B : tensor<?xf32> {bufferization.writable = true}, %lb : index, %ub : index, %step : index) @@ -85,11 +85,11 @@ func.func @nested_scf_for(%A : tensor<?xf32> {bufferization.writable = true}, // ----- -// CHECK-LABEL: func @scf_for_with_tensor.insert_slice +// CHECK-LABEL: func private @scf_for_with_tensor.insert_slice // CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[B:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>> // CHECK-SAME: %[[C:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @scf_for_with_tensor.insert_slice( +func.func private @scf_for_with_tensor.insert_slice( %A : tensor<?xf32> {bufferization.writable = false}, %B : tensor<?xf32> {bufferization.writable = true}, %C : tensor<4xf32> {bufferization.writable = false}, @@ -471,11 +471,11 @@ func.func @scf_while_iter_arg_result_mismatch(%arg0: tensor<5xi1>, // ----- -// CHECK-LABEL: func.func @parallel_insert_slice_no_conflict( +// CHECK-LABEL: func private @parallel_insert_slice_no_conflict( // CHECK-SAME: %[[idx:.*]]: index, %[[idx2:.*]]: index, // CHECK-SAME: %[[arg1:.*]]: memref<?xf32, strided{{.*}}>, // CHECK-SAME: %[[arg2:.*]]: memref<?xf32, strided{{.*}}> -func.func @parallel_insert_slice_no_conflict( +func.func private @parallel_insert_slice_no_conflict( %idx: index, %idx2: index, %arg1: tensor<?xf32> {bufferization.writable = true}, diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir index 5f95da2..b6c72be 100644 --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -8,12 +8,12 @@ // Test bufferization using memref types that have no layout map. // RUN: mlir-opt %s -one-shot-bufferize="unknown-type-conversion=identity-layout-map bufferize-function-boundaries" -split-input-file -o /dev/null -// CHECK-LABEL: func @insert_slice_fun +// CHECK-LABEL: func private @insert_slice_fun // CHECK-SAME: %[[A0:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[A1:[a-zA-Z0-9]*]]: memref<?xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t0:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>>, // CHECK-SAME: %[[t1:[a-zA-Z0-9]*]]: memref<4xf32, strided<[?], offset: ?>> -func.func @insert_slice_fun( +func.func private @insert_slice_fun( %A0 : tensor<?xf32> {bufferization.writable = false}, %A1 : tensor<?xf32> {bufferization.writable = true}, %t0 : tensor<4xf32> {bufferization.writable = false}, @@ -331,12 +331,12 @@ func.func @dim_not_reading(%t: tensor<?xf32>, %f: f32, %pos: index) // ----- // CHECK: #[[$map:.*]] = affine_map<(d0) -> (d0 + 5)> -// CHECK-LABEL: func.func @cast_retains_buffer_layout( +// CHECK-LABEL: func.func private @cast_retains_buffer_layout( // CHECK-SAME: %[[t:.*]]: memref<?xf32, #[[$map]]>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> { // CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, #[[$map]]> to memref<10xf32, #[[$map]]> // CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, #[[$map]]> to memref<?xf32, strided<[1], offset: 7>> // CHECK: return %[[slice]] -func.func @cast_retains_buffer_layout( +func.func private @cast_retains_buffer_layout( %t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0 + 5)>}, %sz: index) @@ -353,12 +353,12 @@ func.func @cast_retains_buffer_layout( // ----- -// CHECK-LABEL: func.func @cast_retains_buffer_layout_strided( +// CHECK-LABEL: func private @cast_retains_buffer_layout_strided( // CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[1], offset: 5>>, %[[sz:.*]]: index) -> memref<?xf32, strided<[1], offset: 7>> { // CHECK: %[[casted:.*]] = memref.cast %[[t]] : memref<?xf32, strided<[1], offset: 5>> to memref<10xf32, strided<[1], offset: 5>> // CHECK: %[[slice:.*]] = memref.subview %[[casted]][2] [%[[sz]]] [1] : memref<10xf32, strided<[1], offset: 5>> to memref<?xf32, strided<[1], offset: 7>> // CHECK: return %[[slice]] -func.func @cast_retains_buffer_layout_strided( +func.func private @cast_retains_buffer_layout_strided( %t: tensor<?xf32> {bufferization.buffer_layout = strided<[1], offset: 5>}, %sz: index) diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir new file mode 100644 index 0000000..023a0e5 --- /dev/null +++ b/mlir/test/Dialect/Vector/canonicalize/vector-step.mlir @@ -0,0 +1,311 @@ +// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file | FileCheck %s + +///===----------------------------------------------===// +/// Tests of `StepCompareFolder` +///===----------------------------------------------===// + + +///===------------------------------------===// +/// Tests of `ugt` (unsigned greater than) +///===------------------------------------===// + +// CHECK-LABEL: @ugt_constant_3_lhs +// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ugt_constant_3_lhs() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 3 > [0, 1, 2] => [true, true, true] => true for all indices => fold + %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ugt_constant_2_lhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ugt_constant_2_lhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 2 > [0, 1, 2] => [true, true, false] => not same for all indices => don't fold + %1 = arith.cmpi ugt, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @ugt_constant_3_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ugt_constant_3_rhs() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] > 3 => [false, false, false] => false for all indices => fold + %1 = arith.cmpi ugt, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @ugt_constant_max_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ugt_constant_max_rhs() -> vector<3xi1> { + // The largest i64 possible: + %cst = arith.constant dense<0x7fffffffffffffff> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ugt, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + + +// ----- + +// CHECK-LABEL: @ugt_constant_2_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ugt_constant_2_rhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] > 2 => [false, false, false] => false for all indices => fold + %1 = arith.cmpi ugt, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ugt_constant_1_rhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ugt_constant_1_rhs() -> vector<3xi1> { + %cst = arith.constant dense<1> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] > 1 => [false, false, true] => not same for all indices => don't fold + %1 = arith.cmpi ugt, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +///===------------------------------------===// +/// Tests of `uge` (unsigned greater than or equal) +///===------------------------------------===// + + +// CHECK-LABEL: @uge_constant_2_lhs +// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @uge_constant_2_lhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 2 >= [0, 1, 2] => [true, true, true] => true for all indices => fold + %1 = arith.cmpi uge, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_uge_constant_1_lhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_uge_constant_1_lhs() -> vector<3xi1> { + %cst = arith.constant dense<1> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 1 >= [0, 1, 2] => [true, false, false] => not same for all indices => don't fold + %1 = arith.cmpi uge, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @uge_constant_3_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @uge_constant_3_rhs() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] >= 3 => [false, false, false] => false for all indices => fold + %1 = arith.cmpi uge, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_uge_constant_2_rhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_uge_constant_2_rhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] >= 2 => [false, false, true] => not same for all indices => don't fold + %1 = arith.cmpi uge, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + + +///===------------------------------------===// +/// Tests of `ult` (unsigned less than) +///===------------------------------------===// + + +// CHECK-LABEL: @ult_constant_2_lhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ult_constant_2_lhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 2 < [0, 1, 2] => [false, false, false] => false for all indices => fold + %1 = arith.cmpi ult, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ult_constant_1_lhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ult_constant_1_lhs() -> vector<3xi1> { + %cst = arith.constant dense<1> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // 1 < [0, 1, 2] => [false, false, true] => not same for all indices => don't fold + %1 = arith.cmpi ult, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @ult_constant_3_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ult_constant_3_rhs() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] < 3 => [true, true, true] => true for all indices => fold + %1 = arith.cmpi ult, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ult_constant_2_rhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ult_constant_2_rhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + // [0, 1, 2] < 2 => [true, true, false] => not same for all indices => don't fold + %1 = arith.cmpi ult, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +///===------------------------------------===// +/// Tests of `ule` (unsigned less than or equal) +///===------------------------------------===// + +// CHECK-LABEL: @ule_constant_3_lhs +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ule_constant_3_lhs() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ule, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ule_constant_2_lhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ule_constant_2_lhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ule, %cst, %0 : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @ule_constant_2_rhs +// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ule_constant_2_rhs() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ule, %0, %cst : vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ule_constant_1_rhs +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ule_constant_1_rhs() -> vector<3xi1> { + %cst = arith.constant dense<1> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ule, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +///===------------------------------------===// +/// Tests of `eq` (equal) +///===------------------------------------===// + +// CHECK-LABEL: @eq_constant_3 +// CHECK: %[[CST:.*]] = arith.constant dense<false> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @eq_constant_3() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi eq, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_eq_constant_2 +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_eq_constant_2() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi eq, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +///===------------------------------------===// +/// Tests of `ne` (not equal) +///===------------------------------------===// + +// CHECK-LABEL: @ne_constant_3 +// CHECK: %[[CST:.*]] = arith.constant dense<true> : vector<3xi1> +// CHECK: return %[[CST]] : vector<3xi1> +func.func @ne_constant_3() -> vector<3xi1> { + %cst = arith.constant dense<3> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ne, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + +// ----- + +// CHECK-LABEL: @negative_ne_constant_2 +// CHECK: %[[CMP:.*]] = arith.cmpi +// CHECK: return %[[CMP]] +func.func @negative_ne_constant_2() -> vector<3xi1> { + %cst = arith.constant dense<2> : vector<3xindex> + %0 = vector.step : vector<3xindex> + %1 = arith.cmpi ne, %0, %cst: vector<3xindex> + return %1 : vector<3xi1> +} + diff --git a/mlir/test/Dialect/Vector/vector-unroll-options.mlir b/mlir/test/Dialect/Vector/vector-unroll-options.mlir index 35db14e..e5a98b5 100644 --- a/mlir/test/Dialect/Vector/vector-unroll-options.mlir +++ b/mlir/test/Dialect/Vector/vector-unroll-options.mlir @@ -188,15 +188,38 @@ func.func @vector_fma(%a: vector<4x4xf32>, %b: vector<4x4xf32>, %c: vector<4x4xf // CHECK-LABEL: func @vector_fma // CHECK-COUNT-4: vector.fma %{{.+}}, %{{.+}}, %{{.+}} : vector<2x2xf32> -// TODO: We should be able to unroll this like the example above - this will require extending UnrollElementwisePattern. -func.func @negative_vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{ +func.func @vector_fma_3d(%a: vector<3x2x2xf32>) -> vector<3x2x2xf32>{ %0 = vector.fma %a, %a, %a : vector<3x2x2xf32> return %0 : vector<3x2x2xf32> } -// CHECK-LABEL: func @negative_vector_fma_3d -// CHECK-NOT: vector.extract_strided_slice -// CHECK: %[[R0:.*]] = vector.fma %{{.+}} : vector<3x2x2xf32> -// CHECK: return +// CHECK-LABEL: func @vector_fma_3d +// CHECK-SAME: (%[[SRC:.*]]: vector<3x2x2xf32>) -> vector<3x2x2xf32> { +// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<3x2x2xf32> +// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_OUT_0:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_OUT_0:.*]] = vector.shape_cast %[[E_OUT_0]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[FMA0:.*]] = vector.fma %[[S_LHS_0]], %[[S_RHS_0]], %[[S_OUT_0]] : vector<2x2xf32> +// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[FMA0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32> +// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_OUT_1:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_OUT_1:.*]] = vector.shape_cast %[[E_OUT_1]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[FMA1:.*]] = vector.fma %[[S_LHS_1]], %[[S_RHS_1]], %[[S_OUT_1]] : vector<2x2xf32> +// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[FMA1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32> +// CHECK: %[[E_LHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_LHS_2:.*]] = vector.shape_cast %[[E_LHS_2]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_RHS_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_RHS_2:.*]] = vector.shape_cast %[[E_RHS_2]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_OUT_2:.*]] = vector.extract_strided_slice %[[SRC]] {offsets = [2, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<3x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_OUT_2:.*]] = vector.shape_cast %[[E_OUT_2]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[FMA2:.*]] = vector.fma %[[S_LHS_2]], %[[S_RHS_2]], %[[S_OUT_2]] : vector<2x2xf32> +// CHECK: %[[I2:.*]] = vector.insert_strided_slice %[[FMA2]], %[[I1]] {offsets = [2, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<3x2x2xf32> +// CHECK: return %[[I2]] : vector<3x2x2xf32> func.func @vector_multi_reduction(%v : vector<4x6xf32>, %acc: vector<4xf32>) -> vector<4xf32> { %0 = vector.multi_reduction #vector.kind<add>, %v, %acc [1] : vector<4x6xf32> to vector<4xf32> @@ -440,3 +463,36 @@ func.func @vector_step() -> vector<32xindex> { // CHECK: %[[ADD3:.*]] = arith.addi %[[STEP]], %[[CST]] : vector<8xindex> // CHECK: %[[INS3:.*]] = vector.insert_strided_slice %[[ADD3]], %[[INS2]] {offsets = [24], strides = [1]} : vector<8xindex> into vector<32xindex> // CHECK: return %[[INS3]] : vector<32xindex> + + +func.func @elementwise_3D_to_2D(%v1: vector<2x2x2xf32>, %v2: vector<2x2x2xf32>) -> vector<2x2x2xf32> { + %0 = arith.addf %v1, %v2 : vector<2x2x2xf32> + return %0 : vector<2x2x2xf32> +} +// CHECK-LABEL: func @elementwise_3D_to_2D +// CHECK-SAME: (%[[ARG0:.*]]: vector<2x2x2xf32>, %[[ARG1:.*]]: vector<2x2x2xf32>) -> vector<2x2x2xf32> { +// CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<2x2x2xf32> +// CHECK: %[[E_LHS_0:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_LHS_0:.*]] = vector.shape_cast %[[E_LHS_0]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_RHS_0:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [0, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_RHS_0:.*]] = vector.shape_cast %[[E_RHS_0]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[ADD0:.*]] = arith.addf %[[S_LHS_0]], %[[S_RHS_0]] : vector<2x2xf32> +// CHECK: %[[I0:.*]] = vector.insert_strided_slice %[[ADD0]], %[[CST]] {offsets = [0, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32> +// CHECK: %[[E_LHS_1:.*]] = vector.extract_strided_slice %[[ARG0]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_LHS_1:.*]] = vector.shape_cast %[[E_LHS_1]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[E_RHS_1:.*]] = vector.extract_strided_slice %[[ARG1]] {offsets = [1, 0, 0], sizes = [1, 2, 2], strides = [1, 1, 1]} : vector<2x2x2xf32> to vector<1x2x2xf32> +// CHECK: %[[S_RHS_1:.*]] = vector.shape_cast %[[E_RHS_1]] : vector<1x2x2xf32> to vector<2x2xf32> +// CHECK: %[[ADD1:.*]] = arith.addf %[[S_LHS_1]], %[[S_RHS_1]] : vector<2x2xf32> +// CHECK: %[[I1:.*]] = vector.insert_strided_slice %[[ADD1]], %[[I0]] {offsets = [1, 0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x2x2xf32> +// CHECK: return %[[I1]] : vector<2x2x2xf32> + + +func.func @elementwise_4D_to_2D(%v1: vector<2x2x2x2xf32>, %v2: vector<2x2x2x2xf32>) -> vector<2x2x2x2xf32> { + %0 = arith.addf %v1, %v2 : vector<2x2x2x2xf32> + return %0 : vector<2x2x2x2xf32> +} + +// CHECK-LABEL: func @elementwise_4D_to_2D +// CHECK-COUNT-4: arith.addf %{{.*}}, %{{.*}} : vector<2x2xf32> +// CHECK-NOT: arith.addf +// CHECK: return diff --git a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir index bb76392..401cdd29 100644 --- a/mlir/test/Dialect/Vector/vector-warp-distribute.mlir +++ b/mlir/test/Dialect/Vector/vector-warp-distribute.mlir @@ -1925,3 +1925,22 @@ func.func @warp_scf_if_distribute(%pred : i1) { // CHECK-PROP: "some_use"(%[[IF_YIELD_DIST]]) : (vector<1xf32>) -> () // CHECK-PROP: return // CHECK-PROP: } + +// ----- +func.func @dedup_unused_result(%laneid : index) -> (vector<1xf32>) { + %r:3 = gpu.warp_execute_on_lane_0(%laneid)[32] -> + (vector<1xf32>, vector<2xf32>, vector<1xf32>) { + %2 = "some_def"() : () -> (vector<32xf32>) + %3 = "some_def"() : () -> (vector<64xf32>) + gpu.yield %2, %3, %2 : vector<32xf32>, vector<64xf32>, vector<32xf32> + } + %r0 = "some_use"(%r#2, %r#2) : (vector<1xf32>, vector<1xf32>) -> (vector<1xf32>) + return %r0 : vector<1xf32> +} + +// CHECK-PROP: func @dedup_unused_result +// CHECK-PROP: %[[R:.*]] = gpu.warp_execute_on_lane_0(%arg0)[32] -> (vector<1xf32>) +// CHECK-PROP: %[[Y0:.*]] = "some_def"() : () -> vector<32xf32> +// CHECK-PROP: %[[Y1:.*]] = "some_def"() : () -> vector<64xf32> +// CHECK-PROP: gpu.yield %[[Y0]] : vector<32xf32> +// CHECK-PROP: "some_use"(%[[R]], %[[R]]) : (vector<1xf32>, vector<1xf32>) -> vector<1xf32> diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir index 9d63c2d..fe4f44c 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir @@ -584,3 +584,101 @@ gpu.module @test_kernel { gpu.return } } + +// ----- +gpu.module @test_kernel { + // CHECK-LABEL: load_with_offsets + // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32> + gpu.func @load_with_offsets(%src: ui64) -> vector<32xf32> { + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + %ld = xegpu.load %src[%cst], %mask {chunk_size = 1, layout_result_0 = #xegpu.layout<inst_data = [16]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32xf32> + + gpu.return %ld : vector<32xf32> + } +} + +// ----- +gpu.module @test_kernel { + // CHECK-LABEL: store_with_offsets + // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1> + gpu.func @store_with_offsets(%src: ui64) { + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + + %st_vec = arith.constant dense<1023.0>: vector<32xf32> + xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 1, layout_operand_0 = #xegpu.layout<inst_data = [16]>, + layout_operand_2 = #xegpu.layout<inst_data = [16]>, + layout_operand_3 = #xegpu.layout<inst_data = [16]>, + l1_hint = #xegpu.cache_hint<cached>} : vector<32xf32>, ui64, vector<32xindex>, vector<32xi1> + + gpu.return + } +} + +// ----- +gpu.module @test_kernel { + // CHECK-LABEL: load_with_offsets_chunk + // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32> + // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex> + // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex> + // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex> + // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex> + // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32> + gpu.func @load_with_offsets_chunk(%src: ui64) -> vector<32x4xf32> { + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + %ld = xegpu.load %src[%cst], %mask {chunk_size = 4, layout_result_0 = #xegpu.layout<inst_data = [16, 2]>, l1_hint = #xegpu.cache_hint<cached>} : ui64, vector<32xindex>, vector<32xi1> -> vector<32x4xf32> + gpu.return %ld : vector<32x4xf32> + } +} + +// ----- +gpu.module @test_kernel { + // CHECK-LABEL: store_with_offsets_chunk + // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32 + // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex> + // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex> + // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex> + // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex> + // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1> + gpu.func @store_with_offsets_chunk(%src: ui64) { + %cst = arith.constant dense<[ + 0, 8, 16, 24, 32, 40, 48, 56, + 64, 72, 80, 88, 96, 104, 112, 120, + 128, 136, 144, 152, 160, 168, 176, 184, + 192, 200, 208, 216, 224, 232, 240, 248 + ]> : vector<32xindex> + + %c17 = arith.constant 17: index + %mask = vector.create_mask %c17: vector<32xi1> + + %st_vec = arith.constant dense<1023.>: vector<32x4xf32> + xegpu.store %st_vec, %src[%cst], %mask {chunk_size = 4, layout_operand_0 = #xegpu.layout<inst_data = [16, 2]>, + layout_operand_2 = #xegpu.layout<inst_data = [16, 2]>, + layout_operand_3 = #xegpu.layout<inst_data = [16, 2]>, + l1_hint = #xegpu.cache_hint<cached>} : vector<32x4xf32>, ui64, vector<32xindex>, vector<32xi1> + gpu.return + } +} diff --git a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir index cb16c37..b3154d4 100644 --- a/mlir/test/Integration/GPU/SPIRV/simple_add.mlir +++ b/mlir/test/Integration/GPU/SPIRV/simple_add.mlir @@ -3,7 +3,16 @@ // RUN: | FileCheck %s // CHECK: data = -// CHECK-RAW: [[[7.7, 0, 0], [7.7, 0, 0], [7.7, 0, 0]], [[0, 7.7, 0], [0, 7.7, 0], [0, 7.7, 0]], [[0, 0, 7.7], [0, 0, 7.7], [0, 0, 7.7]]] +// CHECK{LITERAL}: [[[7.7, 0, 0], +// CHECK{LITERAL}: [7.7, 0, 0], +// CHECK{LITERAL}: [7.7, 0, 0]], +// CHECK{LITERAL}: [[0, 7.7, 0], +// CHECK{LITERAL}: [0, 7.7, 0], +// CHECK{LITERAL}: [0, 7.7, 0]], +// CHECK{LITERAL}: [[0, 0, 7.7], +// CHECK{LITERAL}: [0, 0, 7.7], +// CHECK{LITERAL}: [0, 0, 7.7]]] + module attributes { gpu.container_module, spirv.target_env = #spirv.target_env< diff --git a/mlir/test/Pass/remark-final.mlir b/mlir/test/Pass/remark-final.mlir new file mode 100644 index 0000000..325271e --- /dev/null +++ b/mlir/test/Pass/remark-final.mlir @@ -0,0 +1,17 @@ +// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final 2>&1 | FileCheck %s +// RUN: mlir-opt %s --test-remark --remarks-filter="category.*" --remark-policy=final --remark-format=yaml --remarks-output-file=%t.yaml +// RUN: FileCheck --check-prefix=CHECK-YAML %s < %t.yaml +module @foo { + "test.op"() : () -> () + +} + +// CHECK-YAML-NOT: This is a test passed remark (should be dropped) +// CHECK-YAML-DAG: !Analysis +// CHECK-YAML-DAG: !Failure +// CHECK-YAML-DAG: !Passed + +// CHECK-NOT: This is a test passed remark (should be dropped) +// CHECK-DAG: remark: [Analysis] test-remark +// CHECK-DAG: remark: [Failure] test-remark | Category:category-2-failed +// CHECK-DAG: remark: [Passed] test-remark | Category:category-1-passed diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index e056e43..61376b8 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -240,11 +240,10 @@ define void @subprogram() !dbg !3 { define void @func_loc() !dbg !3 { ret void } -; CHECK-DAG: #[[NAME_LOC:.+]] = loc("func_loc") ; CHECK-DAG: #[[FILE_LOC:.+]] = loc("debug-info.ll":42:0) ; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram<id = distinct[{{.*}}]<>, compileUnit = #{{.*}}, scope = #{{.*}}, name = "func_loc", file = #{{.*}}, line = 42, subprogramFlags = Definition> -; CHECK: loc(fused<#[[SP]]>[#[[NAME_LOC]], #[[FILE_LOC]]] +; CHECK: loc(fused<#[[SP]]>[#[[FILE_LOC]]] !llvm.dbg.cu = !{!1} !llvm.module.flags = !{!0} diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir new file mode 100644 index 0000000..04e2ddf --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/convert_fp4x2.mlir @@ -0,0 +1,12 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// CHECK-LABEL: @convert_f32x2_to_f4x2_e2m1 +llvm.func @convert_f32x2_to_f4x2_e2m1(%srcA : f32, %srcB : f32) { + // CHECK: %[[res1:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.satfinite(float %{{.*}}, float %{{.*}}) + // CHECK-NEXT: %{{.*}} = trunc i16 %[[res1]] to i8 + %res1 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB : i8 (f4E2M1FN) + // CHECK: %[[res2:.*]] = call i16 @llvm.nvvm.ff.to.e2m1x2.rn.relu.satfinite(float %{{.*}}, float %{{.*}}) + // CHECK-NEXT: %{{.*}} = trunc i16 %[[res2]] to i8 + %res2 = nvvm.convert.f32x2.to.f4x2 %srcA, %srcB {relu = true} : i8 (f4E2M1FN) + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 0b36154..b2fe2f5 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -254,6 +254,14 @@ llvm.func @nvvm_cvt_f32x2_to_f6x2_invalid_type(%a : f32, %b : f32) { // ----- +llvm.func @nvvm_cvt_f32x2_to_f4x2_invalid_type(%a : f32, %b : f32) { + // expected-error @below {{Only 'f4E2M1FN' type is supported for conversions from f32x2 to f4x2.}} + %res = nvvm.convert.f32x2.to.f4x2 %a, %b : i8 (f8E4M3FN) + llvm.return +} + +// ----- + llvm.func @nvvm_prefetch_L1_with_evict_priority(%global_ptr: !llvm.ptr<1>) { // expected-error @below {{cache eviction priority supported only for cache level L2}} nvvm.prefetch level = L1, evict_priority = evict_last, %global_ptr : !llvm.ptr<1> @@ -559,3 +567,13 @@ llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_typ %res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1 llvm.return } + + +// ----- + +// Test for range validation - invalid range where lower == upper but not at extremes +func.func @invalid_range_equal_bounds() { + // expected-error @below {{invalid range attribute: Lower == Upper, but they aren't min (0) or max (4294967295) value! This is an invalid constant range.}} + %0 = nvvm.read.ptx.sreg.warpsize range <i32, 32, 32> : i32 + return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 00a479d..594ae48 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -152,6 +152,10 @@ llvm.func @nvvm_special_regs() -> i32 { %74 = nvvm.read.ptx.sreg.lanemask.ge : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt %75 = nvvm.read.ptx.sreg.lanemask.gt : i32 + // CHECK: %76 = call range(i32 0, 0) i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %76 = nvvm.read.ptx.sreg.tid.x range <i32, 0, 0> : i32 + // CHECK: %77 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %77 = nvvm.read.ptx.sreg.tid.x range <i32, 4294967295, 4294967295> : i32 llvm.return %1 : i32 } diff --git a/mlir/test/lib/Analysis/CMakeLists.txt b/mlir/test/lib/Analysis/CMakeLists.txt index 9187998..c37671a 100644 --- a/mlir/test/lib/Analysis/CMakeLists.txt +++ b/mlir/test/lib/Analysis/CMakeLists.txt @@ -17,6 +17,7 @@ add_mlir_library(MLIRTestAnalysis DataFlow/TestDenseForwardDataFlowAnalysis.cpp DataFlow/TestLivenessAnalysis.cpp DataFlow/TestSparseBackwardDataFlowAnalysis.cpp + DataFlow/TestStridedMetadataRangeAnalysis.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp new file mode 100644 index 0000000..6ac09fd --- /dev/null +++ b/mlir/test/lib/Analysis/DataFlow/TestStridedMetadataRangeAnalysis.cpp @@ -0,0 +1,86 @@ +//===- TestStridedMetadataRangeAnalysis.cpp - Test strided md analysis ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/DataFlow/ConstantPropagationAnalysis.h" +#include "mlir/Analysis/DataFlow/DeadCodeAnalysis.h" +#include "mlir/Analysis/DataFlow/IntegerRangeAnalysis.h" +#include "mlir/Analysis/DataFlow/StridedMetadataRangeAnalysis.h" +#include "mlir/Analysis/DataFlowFramework.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; +using namespace mlir::dataflow; + +static void printAnalysisResults(DataFlowSolver &solver, Operation *op, + raw_ostream &os) { + // Collect the strided metadata of the op results. + SmallVector<std::pair<unsigned, const StridedMetadataRangeLattice *>> results; + for (OpResult result : op->getResults()) { + const auto *state = solver.lookupState<StridedMetadataRangeLattice>(result); + // Skip the result if it's uninitialized. + if (!state || state->getValue().isUninitialized()) + continue; + + // Skip the result if the range is empty. + const mlir::StridedMetadataRange &md = state->getValue(); + if (md.getOffsets().empty() && md.getSizes().empty() && + md.getStrides().empty()) + continue; + results.push_back({result.getResultNumber(), state}); + } + + // Early exit if there's no metadata to print. + if (results.empty()) + return; + + // Print the metadata. + os << "Op: " << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n"; + for (auto [idx, state] : results) + os << " result[" << idx << "]: " << state->getValue() << "\n"; + os << "\n"; +} + +namespace { +struct TestStridedMetadataRangeAnalysisPass + : public PassWrapper<TestStridedMetadataRangeAnalysisPass, + OperationPass<>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + TestStridedMetadataRangeAnalysisPass) + + StringRef getArgument() const override { + return "test-strided-metadata-range-analysis"; + } + void runOnOperation() override { + Operation *op = getOperation(); + + DataFlowSolver solver; + solver.load<DeadCodeAnalysis>(); + solver.load<SparseConstantPropagation>(); + solver.load<IntegerRangeAnalysis>(); + solver.load<StridedMetadataRangeAnalysis>(); + if (failed(solver.initializeAndRun(op))) + return signalPassFailure(); + + op->walk( + [&](Operation *op) { printAnalysisResults(solver, op, llvm::errs()); }); + } +}; +} // end anonymous namespace + +namespace mlir { +namespace test { +void registerTestStridedMetadataRangeAnalysisPass() { + PassRegistration<TestStridedMetadataRangeAnalysisPass>(); +} +} // end namespace test +} // end namespace mlir diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp index 1e2d4a7..4069a74 100644 --- a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp +++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp @@ -11,11 +11,25 @@ #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h" #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Pass/Pass.h" +#include "TestAttributes.h" // TestTensorEncodingAttr, TestMemRefLayoutAttr +#include "TestDialect.h" + using namespace mlir; namespace { +MemRefLayoutAttrInterface +getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) { + if (auto encoding = dyn_cast_if_present<test::TestTensorEncodingAttr>( + tensorType.getEncoding())) { + return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get( + tensorType.getContext(), encoding.getDummy())); + } + return {}; +} + struct TestOneShotModuleBufferizePass : public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass) @@ -25,6 +39,7 @@ struct TestOneShotModuleBufferizePass : PassWrapper(pass) {} void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert<test::TestDialect>(); registry.insert<bufferization::BufferizationDialect>(); } StringRef getArgument() const final { @@ -41,6 +56,17 @@ struct TestOneShotModuleBufferizePass bufferization::OneShotBufferizationOptions opt; opt.bufferizeFunctionBoundaries = true; + opt.functionArgTypeConverterFn = + [&](bufferization::TensorLikeType tensor, Attribute memSpace, + func::FuncOp, const bufferization::BufferizationOptions &) { + assert(isa<RankedTensorType>(tensor) && "tests only builtin tensors"); + auto tensorType = cast<RankedTensorType>(tensor); + auto layout = getMemRefLayoutForTensorEncoding(tensorType); + return cast<bufferization::BufferLikeType>( + MemRefType::get(tensorType.getShape(), + tensorType.getElementType(), layout, memSpace)); + }; + bufferization::BufferizationState bufferizationState; if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt, diff --git a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt index f84055d..1e59338 100644 --- a/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/test/lib/Dialect/OpenACC/CMakeLists.txt @@ -1,6 +1,7 @@ add_mlir_library(MLIROpenACCTestPasses TestOpenACC.cpp TestPointerLikeTypeInterface.cpp + TestRecipePopulate.cpp EXCLUDE_FROM_LIBMLIR ) diff --git a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp index 9886240..bea21b9 100644 --- a/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp +++ b/mlir/test/lib/Dialect/OpenACC/TestOpenACC.cpp @@ -15,9 +15,13 @@ namespace test { // Forward declarations of individual test pass registration functions void registerTestPointerLikeTypeInterfacePass(); +void registerTestRecipePopulatePass(); // Unified registration function for all OpenACC tests -void registerTestOpenACC() { registerTestPointerLikeTypeInterfacePass(); } +void registerTestOpenACC() { + registerTestPointerLikeTypeInterfacePass(); + registerTestRecipePopulatePass(); +} } // namespace test } // namespace mlir diff --git a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp index 85f9283..027b0a1 100644 --- a/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp +++ b/mlir/test/lib/Dialect/OpenACC/TestPointerLikeTypeInterface.cpp @@ -196,13 +196,15 @@ void TestPointerLikeTypeInterfacePass::testGenAllocate( newBuilder.setInsertionPointAfter(op); // Call the genAllocate API + bool needsFree = false; Value allocRes = pointerType.genAllocate(newBuilder, loc, "test_alloc", - result.getType(), result); + result.getType(), result, needsFree); if (allocRes) { llvm::errs() << "Successfully generated alloc for operation: "; op->print(llvm::errs()); llvm::errs() << "\n"; + llvm::errs() << "\tneeds free: " << (needsFree ? "true" : "false") << "\n"; // Print all operations that were inserted for (Operation *insertedOp : tracker.insertedOps) { @@ -230,8 +232,8 @@ void TestPointerLikeTypeInterfacePass::testGenFree(Operation *op, Value result, // Call the genFree API auto typedResult = cast<TypedValue<PointerLikeType>>(result); - bool success = - pointerType.genFree(newBuilder, loc, typedResult, result.getType()); + bool success = pointerType.genFree(newBuilder, loc, typedResult, result, + result.getType()); if (success) { llvm::errs() << "Successfully generated free for operation: "; diff --git a/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp new file mode 100644 index 0000000..35f092c --- /dev/null +++ b/mlir/test/lib/Dialect/OpenACC/TestRecipePopulate.cpp @@ -0,0 +1,110 @@ +//===- TestRecipePopulate.cpp - Test Recipe Population -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains test passes for testing the createAndPopulate methods +// of the recipe operations. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Builders.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/CommandLine.h" + +using namespace mlir; +using namespace mlir::acc; + +namespace { + +struct TestRecipePopulatePass + : public PassWrapper<TestRecipePopulatePass, OperationPass<ModuleOp>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestRecipePopulatePass) + + TestRecipePopulatePass() = default; + TestRecipePopulatePass(const TestRecipePopulatePass &pass) + : PassWrapper(pass) { + recipeType = pass.recipeType; + } + + Pass::Option<std::string> recipeType{ + *this, "recipe-type", + llvm::cl::desc("Recipe type: private or firstprivate"), + llvm::cl::init("private")}; + + StringRef getArgument() const override { return "test-acc-recipe-populate"; } + + StringRef getDescription() const override { + return "Test OpenACC recipe population"; + } + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert<acc::OpenACCDialect>(); + registry.insert<arith::ArithDialect>(); + registry.insert<memref::MemRefDialect>(); + } +}; + +void TestRecipePopulatePass::runOnOperation() { + auto module = getOperation(); + OpBuilder builder(&getContext()); + + // Collect all test variables + SmallVector<std::tuple<Operation *, Value, std::string>> testVars; + + module.walk([&](Operation *op) { + if (auto varName = op->getAttrOfType<StringAttr>("test.var")) { + for (auto result : op->getResults()) { + testVars.push_back({op, result, varName.str()}); + } + } + }); + + // Generate recipes at module level + builder.setInsertionPoint(&module.getBodyRegion().front(), + module.getBodyRegion().front().begin()); + + for (auto [op, var, varName] : testVars) { + Location loc = op->getLoc(); + + std::string recipeName = recipeType.getValue() + "_" + varName; + ValueRange bounds; // No bounds for memref tests + + if (recipeType == "private") { + auto recipe = PrivateRecipeOp::createAndPopulate( + builder, loc, recipeName, var.getType(), varName, bounds); + + if (!recipe) { + op->emitError("Failed to create private recipe for ") << varName; + } + } else if (recipeType == "firstprivate") { + auto recipe = FirstprivateRecipeOp::createAndPopulate( + builder, loc, recipeName, var.getType(), varName, bounds); + + if (!recipe) { + op->emitError("Failed to create firstprivate recipe for ") << varName; + } + } + } +} + +} // namespace + +namespace mlir { +namespace test { + +void registerTestRecipePopulatePass() { + PassRegistration<TestRecipePopulatePass>(); +} + +} // namespace test +} // namespace mlir diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 5685004..9e7e4f8 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -22,6 +22,7 @@ include "mlir/IR/AttrTypeBase.td" include "mlir/IR/BuiltinAttributeInterfaces.td" include "mlir/IR/EnumAttr.td" include "mlir/IR/OpAsmInterface.td" +include "mlir/IR/TensorEncoding.td" // All of the attributes will extend this class. class Test_Attr<string name, list<Trait> traits = []> @@ -439,4 +440,20 @@ def TestCustomStorageCtorAttr : Test_Attr<"TestCustomStorageCtorAttr"> { let hasStorageCustomConstructor = 1; } +def TestTensorEncodingAttr : Test_Attr<"TestTensorEncoding", + [DeclareAttrInterfaceMethods<VerifiableTensorEncoding>]> { + let mnemonic = "tensor_encoding"; + + let parameters = (ins "mlir::StringAttr":$dummy); + let assemblyFormat = "`<` $dummy `>`"; +} + +def TestMemRefLayoutAttr : Test_Attr<"TestMemRefLayout", + [DeclareAttrInterfaceMethods<MemRefLayoutAttrInterface>]> { + let mnemonic = "memref_layout"; + + let parameters = (ins "mlir::StringAttr":$dummy); + let assemblyFormat = "`<` $dummy `>`"; +} + #endif // TEST_ATTRDEFS diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index fe1e916..9db7b01 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -542,6 +542,24 @@ test::detail::TestCustomStorageCtorAttrAttrStorage::construct( } //===----------------------------------------------------------------------===// +// TestTensorEncodingAttr +//===----------------------------------------------------------------------===// + +::llvm::LogicalResult TestTensorEncodingAttr::verifyEncoding( + mlir::ArrayRef<int64_t> shape, mlir::Type elementType, + llvm::function_ref<::mlir::InFlightDiagnostic()> emitError) const { + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// TestMemRefLayoutAttr +//===----------------------------------------------------------------------===// + +mlir::AffineMap TestMemRefLayoutAttr::getAffineMap() const { + return mlir::AffineMap::getMultiDimIdentityMap(1, getContext()); +} + +//===----------------------------------------------------------------------===// // TestDialect //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.h b/mlir/test/lib/Dialect/Test/TestAttributes.h index 778d84fa..0ad5ab6 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.h +++ b/mlir/test/lib/Dialect/Test/TestAttributes.h @@ -24,6 +24,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/DialectResourceBlobManager.h" +#include "mlir/IR/TensorEncoding.h" // generated files require above includes to come first #include "TestAttrInterfaces.h.inc" diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h index f2adca6..bcf3b55d 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.h +++ b/mlir/test/lib/Dialect/Test/TestDialect.h @@ -18,6 +18,7 @@ #include "TestInterfaces.h" #include "TestTypes.h" #include "mlir/Bytecode/BytecodeImplementation.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/DLTI/Traits.h" #include "mlir/Dialect/Func/IR/FuncOps.h" diff --git a/mlir/test/lib/Dialect/Test/TestDialect.td b/mlir/test/lib/Dialect/Test/TestDialect.td index 2b5491f..37a263f 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.td +++ b/mlir/test/lib/Dialect/Test/TestDialect.td @@ -24,7 +24,10 @@ def Test_Dialect : Dialect { let useDefaultTypePrinterParser = 0; let useDefaultAttributePrinterParser = 1; let isExtensible = 1; - let dependentDialects = ["::mlir::DLTIDialect"]; + let dependentDialects = [ + "::mlir::DLTIDialect", + "::mlir::bufferization::BufferizationDialect" + ]; let discardableAttrs = (ins "mlir::IntegerAttr":$discardable_attr_key, "SimpleAAttr":$other_discardable_attr_key diff --git a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp index 53055fe..b211e24 100644 --- a/mlir/test/lib/Dialect/Test/TestOpDefs.cpp +++ b/mlir/test/lib/Dialect/Test/TestOpDefs.cpp @@ -1425,6 +1425,39 @@ TestMultiSlotAlloca::handleDestructuringComplete( return createNewMultiAllocaWithoutSlot(slot, builder, *this); } +namespace { +/// Returns test dialect's memref layout for test dialect's tensor encoding when +/// applicable. +MemRefLayoutAttrInterface +getMemRefLayoutForTensorEncoding(RankedTensorType tensorType) { + if (auto encoding = + dyn_cast<test::TestTensorEncodingAttr>(tensorType.getEncoding())) { + return cast<MemRefLayoutAttrInterface>(test::TestMemRefLayoutAttr::get( + tensorType.getContext(), encoding.getDummy())); + } + return {}; +} + +/// Auxiliary bufferization function for test and builtin tensors. +bufferization::BufferLikeType +convertTensorToBuffer(mlir::Operation *op, + const bufferization::BufferizationOptions &options, + bufferization::TensorLikeType tensorLike) { + auto buffer = + *tensorLike.getBufferType(options, [&]() { return op->emitError(); }); + if (auto memref = dyn_cast<MemRefType>(buffer)) { + // Note: For the sake of testing, we want to ensure that encoding -> layout + // bufferization happens. This is currently achieved manually. + auto layout = + getMemRefLayoutForTensorEncoding(cast<RankedTensorType>(tensorLike)); + return cast<bufferization::BufferLikeType>( + MemRefType::get(memref.getShape(), memref.getElementType(), layout, + memref.getMemorySpace())); + } + return buffer; +} +} // namespace + ::mlir::LogicalResult test::TestDummyTensorOp::bufferize( ::mlir::RewriterBase &rewriter, const ::mlir::bufferization::BufferizationOptions &options, @@ -1435,8 +1468,8 @@ TestMultiSlotAlloca::handleDestructuringComplete( return failure(); const auto outType = getOutput().getType(); - const auto bufferizedOutType = test::TestMemrefType::get( - getContext(), outType.getShape(), outType.getElementType(), nullptr); + const auto bufferizedOutType = + convertTensorToBuffer(getOperation(), options, outType); // replace op with memref analogy auto dummyMemrefOp = test::TestDummyMemrefOp::create( rewriter, getLoc(), bufferizedOutType, *buffer); @@ -1470,13 +1503,12 @@ TestMultiSlotAlloca::handleDestructuringComplete( mlir::FailureOr<mlir::bufferization::BufferLikeType> test::TestCreateTensorOp::getBufferType( - mlir::Value value, const mlir::bufferization::BufferizationOptions &, + mlir::Value value, const mlir::bufferization::BufferizationOptions &options, const mlir::bufferization::BufferizationState &, llvm::SmallVector<::mlir::Value> &) { - const auto type = dyn_cast<test::TestTensorType>(value.getType()); + const auto type = dyn_cast<bufferization::TensorLikeType>(value.getType()); if (type == nullptr) return failure(); - return cast<mlir::bufferization::BufferLikeType>(test::TestMemrefType::get( - getContext(), type.getShape(), type.getElementType(), nullptr)); + return convertTensorToBuffer(getOperation(), options, type); } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 6329d61..05a33cf 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -32,6 +32,7 @@ include "mlir/Interfaces/MemorySlotInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/ValueBoundsOpInterface.td" include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td" +include "mlir/Dialect/Bufferization/IR/BufferizationTypeInterfaces.td" // Include the attribute definitions. include "TestAttrDefs.td" @@ -2335,7 +2336,7 @@ def SideEffectWithRegionOp : TEST_Op<"side_effect_with_region_op", } //===----------------------------------------------------------------------===// -// Copy Operation Test +// Copy Operation Test //===----------------------------------------------------------------------===// def CopyOp : TEST_Op<"copy", []> { @@ -3676,10 +3677,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op", ["bufferize", "bufferizesToMemoryRead", "bufferizesToMemoryWrite", "getAliasingValues"]>]> { let arguments = (ins - Arg<TestTensorType>:$input + Arg<Bufferization_TensorLikeTypeInterface>:$input ); let results = (outs - Arg<TestTensorType>:$output + Arg<Bufferization_TensorLikeTypeInterface>:$output ); let extraClassDefinition = [{ @@ -3701,10 +3702,10 @@ def TestDummyTensorOp : TEST_Op<"dummy_tensor_op", def TestDummyMemrefOp : TEST_Op<"dummy_memref_op", []> { let arguments = (ins - Arg<TestMemrefType>:$input + Arg<Bufferization_BufferLikeTypeInterface>:$input ); let results = (outs - Arg<TestMemrefType>:$output + Arg<Bufferization_BufferLikeTypeInterface>:$output ); } @@ -3714,7 +3715,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op", "bufferizesToMemoryWrite", "getAliasingValues", "bufferizesToAllocation"]>]> { let arguments = (ins); - let results = (outs Arg<TestTensorType>:$output); + let results = (outs Arg<Bufferization_TensorLikeTypeInterface>:$output); let extraClassDefinition = [{ bool test::TestCreateTensorOp::bufferizesToMemoryRead(::mlir::OpOperand&, const ::mlir::bufferization::AnalysisState&) { @@ -3738,7 +3739,7 @@ def TestCreateTensorOp : TEST_Op<"create_tensor_op", def TestCreateMemrefOp : TEST_Op<"create_memref_op"> { let arguments = (ins); - let results = (outs Arg<TestMemrefType>:$output); + let results = (outs Arg<Bufferization_BufferLikeTypeInterface>:$output); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Pass/TestRemarksPass.cpp b/mlir/test/lib/Pass/TestRemarksPass.cpp index 3b25686..5ca2d1a 100644 --- a/mlir/test/lib/Pass/TestRemarksPass.cpp +++ b/mlir/test/lib/Pass/TestRemarksPass.cpp @@ -43,7 +43,12 @@ public: << remark::add("This is a test missed remark") << remark::reason("because we are testing the remark pipeline") << remark::suggest("try using the remark pipeline feature"); - + mlir::remark::passed( + loc, + remark::RemarkOpts::name("test-remark").category("category-1-passed")) + << remark::add("This is a test passed remark (should be dropped)") + << remark::reason("because we are testing the remark pipeline") + << remark::suggest("try using the remark pipeline feature"); mlir::remark::passed( loc, remark::RemarkOpts::name("test-remark").category("category-1-passed")) diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td index a896888..9dcf975 100644 --- a/mlir/test/mlir-tblgen/cpp-class-comments.td +++ b/mlir/test/mlir-tblgen/cpp-class-comments.td @@ -96,17 +96,14 @@ def EncodingTrait : AttrInterface<"EncodingTrait"> { }]; let methods = [ ]; -// ATTR-INTERFACE: namespace mlir -// ATTR-INTERFACE-NEXT: namespace a -// ATTR-INTERFACE-NEXT: namespace traits +// ATTR-INTERFACE: namespace mlir::a::traits { // ATTR-INTERFACE-NEXT: /// Common trait for all layouts. // ATTR-INTERFACE-NEXT: class EncodingTrait; } def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> { let cppNamespace = "a::traits"; -// ATTR-INTERFACE: namespace a { -// ATTR-INTERFACE-NEXT: namespace traits { +// ATTR-INTERFACE: namespace a::traits { // ATTR-INTERFACE-NEXT: class SimpleEncodingTrait; } @@ -116,8 +113,7 @@ def SimpleOpInterface : OpInterface<"SimpleOpInterface"> { Simple Op Interface description }]; -// OP-INTERFACE: namespace a { -// OP-INTERFACE-NEXT: namespace traits { +// OP-INTERFACE: namespace a::traits { // OP-INTERFACE-NEXT: /// Simple Op Interface description // OP-INTERFACE-NEXT: class SimpleOpInterface; } diff --git a/mlir/test/mlir-tblgen/dialect.td b/mlir/test/mlir-tblgen/dialect.td index f35ce34..9b45495 100644 --- a/mlir/test/mlir-tblgen/dialect.td +++ b/mlir/test/mlir-tblgen/dialect.td @@ -62,9 +62,14 @@ def E_SpecialNSOp : Op<E_Dialect, "special_ns_op", []> { // DEF: ::E::SPECIAL_NS::SpecialNSOp definitions // DECL-LABEL: GET_OP_CLASSES +// DECL: namespace a { // DECL: a::SomeOp declarations +// DECL: namespace BNS { // DECL: BNS::SomeOp declarations +// DECL: namespace C::CC { // DECL: ::C::CC::SomeOp declarations // DECL: DSomeOp declarations +// DECL: namespace ENS { // DECL: ENS::SomeOp declarations +// DECL: namespace E::SPECIAL_NS { // DECL: ::E::SPECIAL_NS::SpecialNSOp declarations |