diff options
Diffstat (limited to 'mlir/test')
37 files changed, 332 insertions, 807 deletions
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index ac8b44f5..89568e7 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -68,6 +68,7 @@ endif() llvm_canonicalize_cmake_booleans( LLVM_BUILD_EXAMPLES LLVM_HAS_NVPTX_TARGET + LLVM_INCLUDE_SPIRV_TOOLS_TESTS MLIR_ENABLE_BINDINGS_PYTHON MLIR_ENABLE_CUDA_RUNNER MLIR_ENABLE_ROCM_CONVERSIONS @@ -217,6 +218,11 @@ if(MLIR_ENABLE_BINDINGS_PYTHON) ) endif() +if (LLVM_INCLUDE_SPIRV_TOOLS_TESTS) + list(APPEND MLIR_TEST_DEPENDS spirv-as) + list(APPEND MLIR_TEST_DEPENDS spirv-val) +endif() + # This target can be used to just build the dependencies # for the check-mlir target without executing the tests. # This is useful for bots when splitting the build step diff --git a/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules.mlir b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules.mlir index 00bbd1c..96ad107 100644 --- a/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules.mlir +++ b/mlir/test/Conversion/ConvertToSPIRV/convert-gpu-modules.mlir @@ -85,11 +85,10 @@ module attributes { // CHECK: spirv.Load "StorageBuffer" %val = memref.load %arg0[%idx0] : memref<2xi32> // CHECK: spirv.CompositeInsert - %vec = vector.insertelement %val, %vec0[%idx0 : index] : vector<2xi32> + %vec = vector.insert %val, %vec0[%idx0] : i32 into vector<2xi32> // CHECK: spirv.VectorShuffle %shuffle = vector.shuffle %vec, %vec[3, 2, 1, 0] : vector<2xi32>, vector<2xi32> - // CHECK: spirv.CompositeExtract - %res = vector.extractelement %shuffle[%idx0 : index] : vector<4xi32> + %res = vector.extract %shuffle[%idx0] : i32 from vector<4xi32> // CHECK: spirv.AccessChain // CHECK: spirv.Store "StorageBuffer" memref.store %res, %arg1[%idx0]: memref<4xi32> @@ -102,9 +101,9 @@ module attributes { // CHECK-SAME: %{{.*}}: memref<2xi32>, %{{.*}}: memref<4xi32> // CHECK: arith.constant // CHECK: memref.load - // CHECK: vector.insertelement + // CHECK: vector.insert // CHECK: vector.shuffle - // CHECK: vector.extractelement + // CHECK: vector.extract // CHECK: memref.store // CHECK: gpu.return } diff --git a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir index fb14feb..eb9feaa 100644 --- a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir +++ b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir @@ -51,108 +51,6 @@ func.func @insert_size1_vector(%arg0 : vector<1xf32>, %arg1: f32) -> vector<1xf3 // ----- -// CHECK-LABEL: @extract_element -// CHECK-SAME: %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32 -// CHECK: spirv.VectorExtractDynamic %[[V]][%[[ID]]] : vector<4xf32>, i32 -func.func @extract_element(%arg0 : vector<4xf32>, %id : i32) -> f32 { - %0 = vector.extractelement %arg0[%id : i32] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_cst -// CHECK-SAME: %[[V:.*]]: vector<4xf32> -// CHECK: spirv.CompositeExtract %[[V]][1 : i32] : vector<4xf32> -func.func @extract_element_cst(%arg0 : vector<4xf32>) -> f32 { - %idx = arith.constant 1 : i32 - %0 = vector.extractelement %arg0[%idx : i32] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_index -func.func @extract_element_index(%arg0 : vector<4xf32>, %id : index) -> f32 { - // CHECK: spirv.VectorExtractDynamic - %0 = vector.extractelement %arg0[%id : index] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_size1_vector -// CHECK-SAME:(%[[S:.+]]: f32, -func.func @extract_element_size1_vector(%arg0 : f32, %i: index) -> f32 { - %bcast = vector.broadcast %arg0 : f32 to vector<1xf32> - %0 = vector.extractelement %bcast[%i : index] : vector<1xf32> - // CHECK: spirv.ReturnValue %[[S]] - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_0d_vector -// CHECK-SAME: (%[[S:.+]]: f32) -func.func @extract_element_0d_vector(%arg0 : f32) -> f32 { - %bcast = vector.broadcast %arg0 : f32 to vector<f32> - %0 = vector.extractelement %bcast[] : vector<f32> - // CHECK: spirv.ReturnValue %[[S]] - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @insert_element -// CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32 -// CHECK: spirv.VectorInsertDynamic %[[VAL]], %[[V]][%[[ID]]] : vector<4xf32>, i32 -func.func @insert_element(%val: f32, %arg0 : vector<4xf32>, %id : i32) -> vector<4xf32> { - %0 = vector.insertelement %val, %arg0[%id : i32] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_cst -// CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32> -// CHECK: spirv.CompositeInsert %[[VAL]], %[[V]][2 : i32] : f32 into vector<4xf32> -func.func @insert_element_cst(%val: f32, %arg0 : vector<4xf32>) -> vector<4xf32> { - %idx = arith.constant 2 : i32 - %0 = vector.insertelement %val, %arg0[%idx : i32] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_index -func.func @insert_element_index(%val: f32, %arg0 : vector<4xf32>, %id : index) -> vector<4xf32> { - // CHECK: spirv.VectorInsertDynamic - %0 = vector.insertelement %val, %arg0[%id : index] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_size1_vector -// CHECK-SAME: (%[[S:[a-z0-9]+]]: f32 -func.func @insert_element_size1_vector(%scalar: f32, %vector : vector<1xf32>, %i: index) -> vector<1xf32> { - %0 = vector.insertelement %scalar, %vector[%i : index] : vector<1xf32> - // CHECK: spirv.ReturnValue %[[S]] - return %0: vector<1xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_0d_vector -// CHECK-SAME: (%[[S:[a-z0-9]+]]: f32 -func.func @insert_element_0d_vector(%scalar: f32, %vector : vector<f32>) -> vector<f32> { - %0 = vector.insertelement %scalar, %vector[] : vector<f32> - // CHECK: spirv.ReturnValue %[[S]] - return %0: vector<f32> -} - -// ----- - // CHECK-LABEL: @insert_size1_vector // CHECK-SAME: %[[SUB:.*]]: f32, %[[FULL:.*]]: vector<3xf32> // CHECK: %[[RET:.*]] = spirv.CompositeInsert %[[SUB]], %[[FULL]][2 : i32] : f32 into vector<3xf32> diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-alloc.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-alloc.mlir new file mode 100644 index 0000000..e391a89 --- /dev/null +++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc-alloc.mlir @@ -0,0 +1,72 @@ +// RUN: mlir-opt -convert-memref-to-emitc="lower-to-cpp=true" %s -split-input-file | FileCheck %s --check-prefix=CPP +// RUN: mlir-opt -convert-memref-to-emitc="lower-to-cpp=false" %s -split-input-file | FileCheck %s --check-prefix=NOCPP + +func.func @alloc() { + %alloc = memref.alloc() : memref<999xi32> + return +} + +// CPP: module { +// CPP-NEXT: emitc.include <"cstdlib"> +// CPP-LABEL: alloc() +// CPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [i32]} : () -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 999 : index}> : () -> index +// CPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "malloc"(%[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void">> +// CPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<i32> +// CPP-NEXT: return + +// NOCPP: module { +// NOCPP-NEXT: emitc.include <"stdlib.h"> +// NOCPP-LABEL: alloc() +// NOCPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [i32]} : () -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 999 : index}> : () -> index +// NOCPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "malloc"(%[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void">> +// NOCPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<i32> +// NOCPP-NEXT: return + +func.func @alloc_aligned() { + %alloc = memref.alloc() {alignment = 64 : i64} : memref<999xf32> + return +} + +// CPP-LABEL: alloc_aligned +// CPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [f32]} : () -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 999 : index}> : () -> index +// CPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// CPP-NEXT: %[[ALIGNMENT:.*]] = "emitc.constant"() <{value = 64 : index}> : () -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "aligned_alloc"(%[[ALIGNMENT]], %[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t, !emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void">> +// CPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<f32> +// CPP-NEXT: return + +// NOCPP-LABEL: alloc_aligned +// NOCPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [f32]} : () -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 999 : index}> : () -> index +// NOCPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// NOCPP-NEXT: %[[ALIGNMENT:.*]] = "emitc.constant"() <{value = 64 : index}> : () -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "aligned_alloc"(%[[ALIGNMENT]], %[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t, !emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void">> +// NOCPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<f32> +// NOCPP-NEXT: return + +func.func @allocating_multi() { + %alloc_5 = memref.alloc() : memref<7x999xi32> + return +} + +// CPP-LABEL: allocating_multi +// CPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [i32]} : () -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 6993 : index}> : () -> index +// CPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// CPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "malloc"(%[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void"> +// CPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<i32> +// CPP-NEXT: return + +// NOCPP-LABEL: allocating_multi +// NOCPP-NEXT: %[[ALLOC:.*]] = emitc.call_opaque "sizeof"() {args = [i32]} : () -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_SIZE:.*]] = "emitc.constant"() <{value = 6993 : index}> : () -> index +// NOCPP-NEXT: %[[ALLOC_TOTAL_SIZE:.*]] = emitc.mul %[[ALLOC]], %[[ALLOC_SIZE]] : (!emitc.size_t, index) -> !emitc.size_t +// NOCPP-NEXT: %[[ALLOC_PTR:.*]] = emitc.call_opaque "malloc"(%[[ALLOC_TOTAL_SIZE]]) : (!emitc.size_t) -> !emitc.ptr<!emitc.opaque<"void">> +// NOCPP-NEXT: %[[ALLOC_CAST:.*]] = emitc.cast %[[ALLOC_PTR]] : !emitc.ptr<!emitc.opaque<"void">> to !emitc.ptr<i32> +// NOCPP-NEXT: return + diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir index 8c135d5..31e17fb 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm-interface.mlir @@ -274,73 +274,6 @@ func.func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf3 // ----- //===----------------------------------------------------------------------===// -// vector.extractelement -//===----------------------------------------------------------------------===// - -func.func @extractelement_from_vec_0d_f32(%arg0: vector<f32>) -> f32 { - %1 = vector.extractelement %arg0[] : vector<f32> - return %1 : f32 -} -// CHECK-LABEL: @extractelement_from_vec_0d_f32 -// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: llvm.extractelement %{{.*}}[%[[C0]] : {{.*}}] : vector<1xf32> - -// ----- - -func.func @extractelement_from_vec_1d_f32_idx_as_i32(%arg0: vector<16xf32>) -> f32 { - %0 = arith.constant 15 : i32 - %1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32> - return %1 : f32 -} -// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32( -// CHECK-SAME: %[[A:.*]]: vector<16xf32>) -// CHECK: %[[C:.*]] = arith.constant 15 : i32 -// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<16xf32> -// CHECK: return %[[X]] : f32 - -// ----- - -func.func @extractelement_from_vec_1d_f32_idx_as_i32_scalable(%arg0: vector<[16]xf32>) -> f32 { - %0 = arith.constant 15 : i32 - %1 = vector.extractelement %arg0[%0 : i32]: vector<[16]xf32> - return %1 : f32 -} -// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32_scalable( -// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) -// CHECK: %[[C:.*]] = arith.constant 15 : i32 -// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<[16]xf32> -// CHECK: return %[[X]] : f32 - -// ----- -func.func @extractelement_from_vec_1d_f32_idx_as_index(%arg0: vector<16xf32>) -> f32 { - %0 = arith.constant 15 : index - %1 = vector.extractelement %arg0[%0 : index]: vector<16xf32> - return %1 : f32 -} -// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index( -// CHECK-SAME: %[[A:.*]]: vector<16xf32>) -// CHECK: %[[C:.*]] = arith.constant 15 : index -// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 -// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<16xf32> -// CHECK: return %[[X]] : f32 - -// ----- - -func.func @extractelement_from_vec_1d_f32_idx_as_index_scalable(%arg0: vector<[16]xf32>) -> f32 { - %0 = arith.constant 15 : index - %1 = vector.extractelement %arg0[%0 : index]: vector<[16]xf32> - return %1 : f32 -} -// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index_scalable( -// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) -// CHECK: %[[C:.*]] = arith.constant 15 : index -// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 -// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<[16]xf32> -// CHECK: return %[[X]] : f32 - -// ----- - -//===----------------------------------------------------------------------===// // vector.extract //===----------------------------------------------------------------------===// @@ -592,81 +525,6 @@ func.func @extract_scalar_from_vec_2d_f32_dynamic_idxs_compile_time_const(%arg : // ----- //===----------------------------------------------------------------------===// -// vector.insertelement -//===----------------------------------------------------------------------===// - -func.func @insertelement_into_vec_0d_f32(%arg0: f32, %arg1: vector<f32>) -> vector<f32> { - %1 = vector.insertelement %arg0, %arg1[] : vector<f32> - return %1 : vector<f32> -} -// CHECK-LABEL: @insertelement_into_vec_0d_f32 -// CHECK-SAME: %[[A:.*]]: f32, -// CHECK: %[[B:.*]] = builtin.unrealized_conversion_cast %{{.*}} : -// CHECK: vector<f32> to vector<1xf32> -// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C0]] : {{.*}}] : vector<1xf32> - -// ----- - -func.func @insertelement_into_vec_1d_f32_idx_as_i32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { - %0 = arith.constant 3 : i32 - %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<4xf32> - return %1 : vector<4xf32> -} -// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32( -// CHECK-SAME: %[[A:.*]]: f32, -// CHECK-SAME: %[[B:.*]]: vector<4xf32>) -// CHECK: %[[C:.*]] = arith.constant 3 : i32 -// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<4xf32> -// CHECK: return %[[X]] : vector<4xf32> - -// ----- - -func.func @insertelement_into_vec_1d_f32_idx_as_i32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { - %0 = arith.constant 3 : i32 - %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<[4]xf32> - return %1 : vector<[4]xf32> -} -// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32_scalable( -// CHECK-SAME: %[[A:.*]]: f32, -// CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) -// CHECK: %[[C:.*]] = arith.constant 3 : i32 -// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<[4]xf32> -// CHECK: return %[[X]] : vector<[4]xf32> - -// ----- - -func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { - %0 = arith.constant 3 : index - %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<4xf32> - return %1 : vector<4xf32> -} -// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index( -// CHECK-SAME: %[[A:.*]]: f32, -// CHECK-SAME: %[[B:.*]]: vector<4xf32>) -// CHECK: %[[C:.*]] = arith.constant 3 : index -// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 -// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<4xf32> -// CHECK: return %[[X]] : vector<4xf32> - -// ----- - -func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { - %0 = arith.constant 3 : index - %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<[4]xf32> - return %1 : vector<[4]xf32> -} -// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable( -// CHECK-SAME: %[[A:.*]]: f32, -// CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) -// CHECK: %[[C:.*]] = arith.constant 3 : index -// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 -// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<[4]xf32> -// CHECK: return %[[X]] : vector<[4]xf32> - -// ----- - -//===----------------------------------------------------------------------===// // vector.insert //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir index f43a41a..8918f91 100644 --- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir +++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir @@ -400,67 +400,6 @@ func.func @insert_dynamic_cst(%val: f32, %arg0 : vector<4xf32>) -> vector<4xf32> // ----- -// CHECK-LABEL: @extract_element -// CHECK-SAME: %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32 -// CHECK: spirv.VectorExtractDynamic %[[V]][%[[ID]]] : vector<4xf32>, i32 -func.func @extract_element(%arg0 : vector<4xf32>, %id : i32) -> f32 { - %0 = vector.extractelement %arg0[%id : i32] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_cst -// CHECK-SAME: %[[V:.*]]: vector<4xf32> -// CHECK: spirv.CompositeExtract %[[V]][1 : i32] : vector<4xf32> -func.func @extract_element_cst(%arg0 : vector<4xf32>) -> f32 { - %idx = arith.constant 1 : i32 - %0 = vector.extractelement %arg0[%idx : i32] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_index -func.func @extract_element_index(%arg0 : vector<4xf32>, %id : index) -> f32 { - // CHECK: spirv.VectorExtractDynamic - %0 = vector.extractelement %arg0[%id : index] : vector<4xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_size5_vector -func.func @extract_element_size5_vector(%arg0 : vector<5xf32>, %id : i32) -> f32 { - // CHECK: vector.extractelement - %0 = vector.extractelement %arg0[%id : i32] : vector<5xf32> - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_size1_vector -// CHECK-SAME: (%[[S:.+]]: f32 -func.func @extract_element_size1_vector(%arg0 : f32, %i: index) -> f32 { - %bcast = vector.broadcast %arg0 : f32 to vector<1xf32> - %0 = vector.extractelement %bcast[%i : index] : vector<1xf32> - // CHECK: return %[[S]] - return %0: f32 -} - -// ----- - -// CHECK-LABEL: @extract_element_0d_vector -// CHECK-SAME: (%[[S:.+]]: f32) -func.func @extract_element_0d_vector(%arg0 : f32) -> f32 { - %bcast = vector.broadcast %arg0 : f32 to vector<f32> - %0 = vector.extractelement %bcast[] : vector<f32> - // CHECK: return %[[S]] - return %0: f32 -} - -// ----- - // CHECK-LABEL: @extract_strided_slice // CHECK-SAME: %[[ARG:.+]]: vector<4xf32> // CHECK: spirv.VectorShuffle [1 : i32, 2 : i32] %[[ARG]], %[[ARG]] : vector<4xf32>, vector<4xf32> -> vector<2xf32> @@ -473,67 +412,6 @@ func.func @extract_strided_slice(%arg0: vector<4xf32>) -> (vector<2xf32>, vector // ----- -// CHECK-LABEL: @insert_element -// CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32 -// CHECK: spirv.VectorInsertDynamic %[[VAL]], %[[V]][%[[ID]]] : vector<4xf32>, i32 -func.func @insert_element(%val: f32, %arg0 : vector<4xf32>, %id : i32) -> vector<4xf32> { - %0 = vector.insertelement %val, %arg0[%id : i32] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_cst -// CHECK-SAME: %[[VAL:.*]]: f32, %[[V:.*]]: vector<4xf32> -// CHECK: spirv.CompositeInsert %[[VAL]], %[[V]][2 : i32] : f32 into vector<4xf32> -func.func @insert_element_cst(%val: f32, %arg0 : vector<4xf32>) -> vector<4xf32> { - %idx = arith.constant 2 : i32 - %0 = vector.insertelement %val, %arg0[%idx : i32] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_index -func.func @insert_element_index(%val: f32, %arg0 : vector<4xf32>, %id : index) -> vector<4xf32> { - // CHECK: spirv.VectorInsertDynamic - %0 = vector.insertelement %val, %arg0[%id : index] : vector<4xf32> - return %0: vector<4xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_size5_vector -func.func @insert_element_size5_vector(%val: f32, %arg0 : vector<5xf32>, %id : i32) -> vector<5xf32> { - // CHECK: vector.insertelement - %0 = vector.insertelement %val, %arg0[%id : i32] : vector<5xf32> - return %0 : vector<5xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_size1_vector -// CHECK-SAME: (%[[S:[a-z0-9]+]]: f32 -func.func @insert_element_size1_vector(%scalar: f32, %vector : vector<1xf32>, %i: index) -> vector<1xf32> { - %0 = vector.insertelement %scalar, %vector[%i : index] : vector<1xf32> - // CHECK: %[[V:.+]] = builtin.unrealized_conversion_cast %arg0 : f32 to vector<1xf32> - // CHECK: return %[[V]] - return %0: vector<1xf32> -} - -// ----- - -// CHECK-LABEL: @insert_element_0d_vector -// CHECK-SAME: (%[[S:[a-z0-9]+]]: f32 -func.func @insert_element_0d_vector(%scalar: f32, %vector : vector<f32>) -> vector<f32> { - %0 = vector.insertelement %scalar, %vector[] : vector<f32> - // CHECK: %[[V:.+]] = builtin.unrealized_conversion_cast %arg0 : f32 to vector<f32> - // CHECK: return %[[V]] - return %0: vector<f32> -} - -// ----- - // CHECK-LABEL: @insert_strided_slice // CHECK-SAME: %[[PART:.+]]: vector<2xf32>, %[[ALL:.+]]: vector<4xf32> // CHECK: spirv.VectorShuffle [0 : i32, 4 : i32, 5 : i32, 3 : i32] %[[ALL]], %[[PART]] : vector<4xf32>, vector<2xf32> -> vector<4xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir new file mode 100644 index 0000000..e2ab876 --- /dev/null +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-non-module-bufferize.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='builtin.module(test.symbol_scope_isolated(test-one-shot-module-bufferize))' -split-input-file | FileCheck %s + +"test.symbol_scope_isolated"() ({ + // CHECK-LABEL: func @inner_func( + // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 + func.func @inner_func(%t: tensor<?xf32>) -> (tensor<?xf32>, f32) { + // CHECK-NOT: copy + %f = arith.constant 1.0 : f32 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + // CHECK: memref.store %{{.*}}, %[[arg0]] + %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> + // CHECK: %[[load:.*]] = memref.load %[[arg0]] + %1 = tensor.extract %0[%c1] : tensor<?xf32> + // CHECK: return %[[arg0]], %[[load]] : memref<?xf32{{.*}}>, f32 + return %0, %1 : tensor<?xf32>, f32 + } + + // CHECK-LABEL: func @call_func_with_non_tensor_return( + // CHECK-SAME: %[[arg0:.*]]: memref<?xf32 + func.func @call_func_with_non_tensor_return( + %t0: tensor<?xf32> {bufferization.writable = true}) -> (f32, tensor<?xf32>) { + // CHECK-NOT: alloc + // CHECK-NOT: copy + // CHECK: %[[call:.*]]:2 = call @inner_func(%[[arg0]]) + %0, %1 = call @inner_func(%t0) : (tensor<?xf32>) -> (tensor<?xf32>, f32) + // CHECK: return %[[call]]#1, %[[call]]#0 : f32, memref<?xf32,{{.*}}> + return %1, %0 : f32, tensor<?xf32> + } + "test.finish" () : () -> () +}) : () -> () + + diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 9cbb56e4..39a7b1b 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1387,42 +1387,43 @@ func.func @recursive_effect(%arg : tensor<1xf32>) { // CHECK-LABEL: @recursive_effect // CHECK: linalg.map +// ----- + //===----------------------------------------------------------------------===// // linalg.pack //===----------------------------------------------------------------------===// // CHECK-LABEL: func @fold_pack_constant_splat // CHECK-NOT: linalg.pack -// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> -func.func @fold_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { +// CHECK: arith.constant dense<1.000000e-01> : tensor<4x8x8x32xf32> +func.func @fold_pack_constant_splat(%dest : tensor<4x8x8x32xf32>) -> tensor<4x8x8x32xf32> { %cst = arith.constant dense<1.000000e-01> : tensor<64x128xf32> %0 = linalg.pack %cst outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> + inner_tiles = [8, 32] into %dest : tensor<64x128xf32> -> tensor<4x8x8x32xf32> + return %0 : tensor<4x8x8x32xf32> } // ----- // CHECK-LABEL: func @fold_padding_value_pack_constant_splat // CHECK-NOT: linalg.pack -// CHECK: arith.constant dense<1.000000e-01> : tensor<8x16x8x32xf32> -func.func @fold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { +// CHECK: arith.constant dense<1.000000e-01> : tensor<4x8x8x32xf32> +func.func @fold_padding_value_pack_constant_splat(%dest : tensor<4x8x8x32xf32>) -> tensor<4x8x8x32xf32> { %pad = arith.constant 1.000000e-01 : f32 %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> %0 = linalg.pack %cst padding_value(%pad : f32) outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] - inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> + inner_tiles = [8, 32] into %dest : tensor<63x127xf32> -> tensor<4x8x8x32xf32> + return %0 : tensor<4x8x8x32xf32> } - // ----- // CHECK-LABEL: func @nofold_padding_value_pack_constant_splat // CHECK: arith.constant dense<1.000000e-01> : tensor<63x127xf32> // CHECK: linalg.pack -func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32>) -> tensor<8x16x8x32xf32> { +func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<4x8x8x32xf32>) -> tensor<4x8x8x32xf32> { %pad = arith.constant 0.0 : f32 %cst = arith.constant dense<1.000000e-01> : tensor<63x127xf32> %0 = linalg.pack %cst @@ -1430,8 +1431,8 @@ func.func @nofold_padding_value_pack_constant_splat(%dest : tensor<8x16x8x32xf32 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32] - into %dest : tensor<63x127xf32> -> tensor<8x16x8x32xf32> - return %0 : tensor<8x16x8x32xf32> + into %dest : tensor<63x127xf32> -> tensor<4x8x8x32xf32> + return %0 : tensor<4x8x8x32xf32> } // ----- diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index 6fc8d9f..cc26fa4 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -1295,24 +1295,6 @@ func.func @no_bubble_up_pack_expanded_padding_through_expand_cannot_reassociate( // ----- -func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate(%arg0: tensor<32x64xf32>) -> tensor<8x4x16x8xf32> { - %empty = tensor.empty() : tensor<8x4x16x8xf32> - %expanded = tensor.expand_shape %arg0 [[0], [1, 2]] output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32> - %pack = linalg.pack %expanded inner_dims_pos = [0] inner_tiles = [8] into %empty : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32> - return %pack : tensor<8x4x16x8xf32> -} -// CHECK-LABEL: func.func @no_bubble_up_pack_extending_dimension_through_expand_cannot_reassociate( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x4x16x8xf32> -// CHECK: %[[EXPANDED:.+]] = tensor.expand_shape %[[ARG0]] {{\[}}[0], [1, 2]] -// CHECK-SAME: output_shape [32, 4, 16] : tensor<32x64xf32> into tensor<32x4x16xf32> -// CHECK: %[[PACK:.+]] = linalg.pack %[[EXPANDED]] -// CHECK-SAME: inner_dims_pos = [0] inner_tiles = [8] into %[[EMPTY]] -// CHECK-SAME: : tensor<32x4x16xf32> -> tensor<8x4x16x8xf32> -// CHECK: return %[[PACK]] : tensor<8x4x16x8xf32> - -// ----- - func.func @push_down_unpack_through_expand(%5: tensor<?x32x8x8xf32>, %dim: index, %sz0: index) -> tensor<?x256x256xf32> { %6 = tensor.empty(%dim) : tensor<?x256xf32> %unpack = linalg.unpack %5 outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %6 : tensor<?x32x8x8xf32> -> tensor<?x256xf32> diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index da1dfc7..40bf4d1 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -1760,6 +1760,7 @@ func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf } // ----- + func.func @pack_mismatch_inner_tile_size_and_output_shape( %input : tensor<?x?xf32>, %output : tensor<?x?x8x8xf32>) -> tensor<?x?x8x8xf32> { // expected-error@+1 {{mismatch in inner tile sizes specified and shaped of tiled dimension in the packed type}} @@ -1824,27 +1825,47 @@ func.func @unpack_invalid_outer_dims_perm(%source: tensor<128x256xf32>, %dest: t // ----- +func.func @pack_with_artificial_padding(%input: tensor<9xf32>, %output: tensor<3x8xf32>) -> tensor<3x8xf32> { + %cst = arith.constant 0.0 : f32 + // expected-error@+1 {{expected 'tensor<2x8xf32>' for the packed domain value, got 'tensor<3x8xf32>'}} + %0 = linalg.pack %input padding_value(%cst : f32) inner_dims_pos = [0] + inner_tiles = [8] into %output + : tensor<9xf32> -> tensor<3x8xf32> + return %0 : tensor<3x8xf32> +} + +// ----- + // The outer dims in the output tensor are incorrectly/unexpectedly transposed. // This could be fixed by adding `outer_dims_perm = [1, 0]` (the default value assumes no transpose). func.func @pack_invalid_result_shape(%input: tensor<256x128xf32>, %output: tensor<4x16x32x16xf32>) -> tensor<4x16x32x16xf32> { - // expected-error@+1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<16x4x32x16xf32>', got 'tensor<4x16x32x16xf32>'}} + // expected-error@+1 {{expected 'tensor<16x4x32x16xf32>' for the packed domain value, got 'tensor<4x16x32x16xf32>'}} %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [32, 16] into %output : tensor<256x128xf32> -> tensor<4x16x32x16xf32> return %0 : tensor<4x16x32x16xf32> } // ----- -func.func @pack_invalid(%input: tensor<256x128xf32>, %output: tensor<8x8x32x16xf32>) -> tensor<8x8x32x16xf32> { - // expected-error@+1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x8x16x32xf32>', got 'tensor<8x8x32x16xf32>'}} - %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %output : tensor<256x128xf32> -> tensor<8x8x32x16xf32> - return %0 : tensor<8x8x32x16xf32> +func.func @pack_invalid_result_shape(%input: tensor<256x128xf32>, %output: tensor<8x7x16x32xf32>) -> tensor<8x7x16x32xf32> { + // expected-error@+1 {{expected 'tensor<8x8x16x32xf32>' for the packed domain value, got 'tensor<8x7x16x32xf32>'}} + %0 = linalg.pack %input inner_dims_pos = [1, 0] inner_tiles = [16, 32] into %output : tensor<256x128xf32> -> tensor<8x7x16x32xf32> + return %0 : tensor<8x7x16x32xf32> +} + +// ----- + +func.func @unpack_with_artifical_tiles_that_are_dropped(%input: tensor<3x8xf32>, %output: tensor<9xf32>) -> tensor<9xf32> { + // expected-error@+1 {{expected 'tensor<2x8xf32>' for the packed domain value, got 'tensor<3x8xf32>'}} + %0 = linalg.unpack %input inner_dims_pos = [0] inner_tiles = [8] into %output + : tensor<3x8xf32> -> tensor<9xf32> + return %0 : tensor<9xf32> } // ----- -func.func @unpack_invalid(%output: tensor<256x128xf32>, %input: tensor<8x8x32x16xf32>) -> tensor<256x128xf32> { - // expected-error@+1 {{the shape of output is not large enough to hold the packed data. Expected at least 'tensor<8x32x4x32xf32>', got 'tensor<8x8x32x16xf32>'}} - %0 = linalg.unpack %input inner_dims_pos = [1, 0] inner_tiles = [4, 32] into %output : tensor<8x8x32x16xf32> -> tensor<256x128xf32> +func.func @unpack_invalid_source_shape(%output: tensor<256x128xf32>, %input: tensor<8x8x4x32xf32>) -> tensor<256x128xf32> { + // expected-error@+1 {{expected 'tensor<8x32x4x32xf32>' for the packed domain value, got 'tensor<8x8x4x32xf32>'}} + %0 = linalg.unpack %input inner_dims_pos = [1, 0] inner_tiles = [4, 32] into %output : tensor<8x8x4x32xf32> -> tensor<256x128xf32> return %0 : tensor<256x128xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir index 81fd7a8..9e7681d 100644 --- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir +++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir @@ -326,23 +326,23 @@ module attributes {transform.with_named_sequence} { // ----- // CHECK-LABEL: func.func @pack_with_pad( -func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x16x16x1xf32>) - -> tensor<265x16x16x1xf32> { +func.func @pack_with_pad(%src: tensor<4225x12xf32>, %dest: tensor<265x12x16x1xf32>) + -> tensor<265x12x16x1xf32> { // CHECK: tensor.pad {{.*}} low[0, 0] - // CHECK: : tensor<4225x12xf32> to tensor<4240x16xf32> + // CHECK: : tensor<4225x12xf32> to tensor<4240x12xf32> // CHECK: tensor.expand_shape %{{.*}} {{\[}}[0, 1], [2, 3]] - // CHECK-SAME: : tensor<4240x16xf32> into tensor<265x16x16x1xf32> + // CHECK-SAME: : tensor<4240x12xf32> into tensor<265x16x12x1xf32> // CHECK: linalg.transpose - // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor<265x16x16x1xf32>) - // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<265x16x16x1xf32>) + // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor<265x16x12x1xf32>) + // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<265x12x16x1xf32>) // CHECK-SAME: permutation = [0, 2, 1, 3] %cst = arith.constant 0.000000e+00 : f32 %0 = linalg.pack %src padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [16, 1] into %dest - : tensor<4225x12xf32> -> tensor<265x16x16x1xf32> - return %0 : tensor<265x16x16x1xf32> + : tensor<4225x12xf32> -> tensor<265x12x16x1xf32> + return %0 : tensor<265x12x16x1xf32> } module attributes {transform.with_named_sequence} { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 4c50ed3..8c846cd 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -1406,7 +1406,7 @@ func.func @omp_atomic_update(%x : memref<i32>, %expr : i32, %xBool : memref<i1>, // CHECK-NEXT: (%[[XVAL:.*]]: i1): // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.icmp "eq" %[[XVAL]], %[[EXPRBOOL]] : i1 // CHECK-NEXT: omp.yield(%[[NEWVAL]] : i1) - // } + // CHECK-NEXT: } omp.atomic.update %xBool : memref<i1> { ^bb0(%xval: i1): %newval = llvm.icmp "eq" %xval, %exprBool : i1 @@ -1562,6 +1562,14 @@ func.func @omp_atomic_update(%x : memref<i32>, %expr : i32, %xBool : memref<i1>, omp.yield(%newval : i32) } + // CHECK: omp.atomic.update %[[X]] : memref<i32> { + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: omp.yield(%{{.+}} : i32) + // CHECK-NEXT: } {atomic_control = #omp.atomic_control<ignore_denormal_mode = true, fine_grained_memory = true, remote_memory = true>} + omp.atomic.update %x : memref<i32> { + ^bb0(%xval:i32): + omp.yield(%const:i32) + } {atomic_control = #omp.atomic_control<ignore_denormal_mode = true, fine_grained_memory = true, remote_memory = true>} return } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 1461c30..9cfebd5 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -2562,118 +2562,6 @@ func.func @insert_2d_splat_constant() // ----- -// CHECK-LABEL: func @insert_element_fold -// CHECK: %[[V:.+]] = arith.constant dense<[0, 1, 7, 3]> : vector<4xi32> -// CHECK: return %[[V]] -func.func @insert_element_fold() -> vector<4xi32> { - %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> - %s = arith.constant 7 : i32 - %i = arith.constant 2 : i32 - %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> - return %1 : vector<4xi32> -} - -// ----- - -// CHECK-LABEL: func @insert_element_invalid_fold -func.func @insert_element_invalid_fold() -> vector<1xf32> { - // Out-of-bound index here. - %c26 = arith.constant 26 : index - %cst_2 = arith.constant 1.60215309E+9 : f32 - %cst_20 = arith.constant dense<1.60215309E+9> : vector<1xf32> -// CHECK: vector.insertelement - %46 = vector.insertelement %cst_2, %cst_20[%c26 : index] : vector<1xf32> - return %46 : vector<1xf32> -} - - -// ----- - -// Do not crash on poison -// CHECK-LABEL: func @insert_poison_fold1 -// CHECK: vector.insertelement -func.func @insert_poison_fold1() -> vector<4xi32> { - %v = ub.poison : vector<4xi32> - %s = arith.constant 7 : i32 - %i = arith.constant 2 : i32 - %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> - return %1 : vector<4xi32> -} - -// ----- - -// Do not crash on poison -// CHECK-LABEL: func @insert_poison_fold2 -// CHECK: vector.insertelement -func.func @insert_poison_fold2() -> vector<4xi32> { - %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> - %s = ub.poison : i32 - %i = arith.constant 2 : i32 - %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> - return %1 : vector<4xi32> -} - -// ----- - -// Do not crash on poison -// CHECK-LABEL: func @insert_poison_fold3 -// CHECK: vector.insertelement -func.func @insert_poison_fold3() -> vector<4xi32> { - %v = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> - %s = arith.constant 7 : i32 - %i = ub.poison : i32 - %1 = vector.insertelement %s, %v[%i : i32] : vector<4xi32> - return %1 : vector<4xi32> -} - -// ----- - -// CHECK-LABEL: func @extract_element_fold -// CHECK: %[[C:.+]] = arith.constant 5 : i32 -// CHECK: return %[[C]] -func.func @extract_element_fold() -> i32 { - %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32> - %i = arith.constant 2 : i32 - %1 = vector.extractelement %v[%i : i32] : vector<4xi32> - return %1 : i32 -} - -// CHECK-LABEL: func @extract_element_splat_fold -// CHECK-SAME: (%[[ARG:.+]]: i32) -// CHECK: return %[[ARG]] -func.func @extract_element_splat_fold(%a : i32) -> i32 { - %v = vector.splat %a : vector<4xi32> - %i = arith.constant 2 : i32 - %1 = vector.extractelement %v[%i : i32] : vector<4xi32> - return %1 : i32 -} - -// ----- - -// Do not crash on poison -// CHECK-LABEL: func @extract_element_poison_fold1 -// CHECK: vector.extractelement -func.func @extract_element_poison_fold1() -> i32 { - %v = ub.poison : vector<4xi32> - %i = arith.constant 2 : i32 - %1 = vector.extractelement %v[%i : i32] : vector<4xi32> - return %1 : i32 -} - -// ----- - -// Do not crash on poison -// CHECK-LABEL: func @extract_element_poison_fold2 -// CHECK: vector.extractelement -func.func @extract_element_poison_fold2() -> i32 { - %v = arith.constant dense<[1, 3, 5, 7]> : vector<4xi32> - %i = ub.poison : i32 - %1 = vector.extractelement %v[%i : i32] : vector<4xi32> - return %1 : i32 -} - -// ----- - // CHECK-LABEL: func @reduce_one_element_vector_extract // CHECK-SAME: (%[[V:.+]]: vector<1xf32>) // CHECK: %[[S:.+]] = vector.extract %[[V]][0] : f32 from vector<1xf32> @@ -2933,18 +2821,6 @@ func.func @extract_strided_slice_of_constant_mask() -> vector<5x7xi1>{ // ----- -// CHECK-LABEL: func.func @fold_extractelement_of_broadcast( -// CHECK-SAME: %[[f:.*]]: f32 -// CHECK: return %[[f]] -func.func @fold_extractelement_of_broadcast(%f: f32) -> f32 { - %0 = vector.broadcast %f : f32 to vector<15xf32> - %c5 = arith.constant 5 : index - %1 = vector.extractelement %0 [%c5 : index] : vector<15xf32> - return %1 : f32 -} - -// ----- - // CHECK-LABEL: func.func @fold_0d_vector_reduction func.func @fold_0d_vector_reduction(%arg0: vector<f32>) -> f32 { // CHECK-NEXT: %[[RES:.*]] = vector.extract %arg{{.*}}[] : f32 from vector<f32> diff --git a/mlir/test/Dialect/Vector/int-range-interface.mlir b/mlir/test/Dialect/Vector/int-range-interface.mlir index 0263193..2563b48 100644 --- a/mlir/test/Dialect/Vector/int-range-interface.mlir +++ b/mlir/test/Dialect/Vector/int-range-interface.mlir @@ -60,16 +60,6 @@ func.func @vector_extract() -> index { func.return %2 : index } -// CHECK-LABEL: func @vector_extractelement -// CHECK: test.reflect_bounds {smax = 7 : index, smin = 6 : index, umax = 7 : index, umin = 6 : index} -func.func @vector_extractelement() -> index { - %c0 = arith.constant 0 : index - %0 = test.with_bounds { umin = 6 : index, umax = 7 : index, smin = 6 : index, smax = 7 : index } : vector<4xindex> - %1 = vector.extractelement %0[%c0 : index] : vector<4xindex> - %2 = test.reflect_bounds %1 : index - func.return %2 : index -} - // CHECK-LABEL: func @vector_add // CHECK: test.reflect_bounds {smax = 12 : index, smin = 10 : index, umax = 12 : index, umin = 10 : index} func.func @vector_add() -> vector<4xindex> { @@ -90,17 +80,6 @@ func.func @vector_insert() -> vector<4xindex> { func.return %3 : vector<4xindex> } -// CHECK-LABEL: func @vector_insertelement -// CHECK: test.reflect_bounds {smax = 8 : index, smin = 5 : index, umax = 8 : index, umin = 5 : index} -func.func @vector_insertelement() -> vector<4xindex> { - %c0 = arith.constant 0 : index - %0 = test.with_bounds { umin = 5 : index, umax = 7 : index, smin = 5 : index, smax = 7 : index } : vector<4xindex> - %1 = test.with_bounds { umin = 6 : index, umax = 8 : index, smin = 6 : index, smax = 8 : index } : index - %2 = vector.insertelement %1, %0[%c0 : index] : vector<4xindex> - %3 = test.reflect_bounds %2 : vector<4xindex> - func.return %3 : vector<4xindex> -} - // CHECK-LABEL: func @test_loaded_vector_extract // No bounds // CHECK: test.reflect_bounds {smax = 2147483647 : si32, smin = -2147483648 : si32, umax = 4294967295 : ui32, umin = 0 : ui32} %{{.*}} : i32 diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index ca837d3..c21de56 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -119,30 +119,6 @@ func.func @shuffle_empty_mask(%arg0: vector<2xf32>, %arg1: vector<2xf32>) { // ----- -func.func @extract_element(%arg0: vector<f32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{expected position to be empty with 0-D vector}} - %1 = vector.extractelement %arg0[%c : i32] : vector<f32> -} - -// ----- - -func.func @extract_element(%arg0: vector<4xf32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{expected position for 1-D vector}} - %1 = vector.extractelement %arg0[] : vector<4xf32> -} - -// ----- - -func.func @extract_element(%arg0: vector<4x4xf32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{unexpected >1 vector rank}} - %1 = vector.extractelement %arg0[%c : i32] : vector<4x4xf32> -} - -// ----- - func.func @extract_vector_type(%arg0: index) { // expected-error@+1 {{invalid kind of type specified: expected builtin.vector, but found 'index'}} %1 = vector.extract %arg0[] : index from index @@ -192,38 +168,6 @@ func.func @extract_position_overflow(%arg0: vector<4x8x16xf32>) { // ----- -func.func @insert_element(%arg0: f32, %arg1: vector<f32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{expected position to be empty with 0-D vector}} - %0 = vector.insertelement %arg0, %arg1[%c : i32] : vector<f32> -} - -// ----- - -func.func @insert_element(%arg0: f32, %arg1: vector<4xf32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{expected position for 1-D vector}} - %0 = vector.insertelement %arg0, %arg1[] : vector<4xf32> -} - -// ----- - -func.func @insert_element(%arg0: f32, %arg1: vector<4x4xf32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{unexpected >1 vector rank}} - %0 = vector.insertelement %arg0, %arg1[%c : i32] : vector<4x4xf32> -} - -// ----- - -func.func @insert_element_wrong_type(%arg0: i32, %arg1: vector<4xf32>) { - %c = arith.constant 3 : i32 - // expected-error@+1 {{'vector.insertelement' op failed to verify that source operand type matches element type of result}} - %0 = "vector.insertelement" (%arg0, %arg1, %c) : (i32, vector<4xf32>, i32) -> (vector<4xf32>) -} - -// ----- - func.func @insert_vector_type(%a: f32, %b: vector<4x8x16xf32>) { // expected-error@+1 {{expected position attribute of rank no greater than dest vector rank}} %1 = vector.insert %a, %b[3, 3, 3, 3, 3, 3] : f32 into vector<4x8x16xf32> diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 6a56116..625ffc1 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -199,22 +199,6 @@ func.func @shuffle_poison_mask(%a: vector<4xf32>, %b: vector<4xf32>) -> vector<4 return %1 : vector<4xf32> } -// CHECK-LABEL: @extract_element_0d -func.func @extract_element_0d(%a: vector<f32>) -> f32 { - // CHECK-NEXT: vector.extractelement %{{.*}}[] : vector<f32> - %1 = vector.extractelement %a[] : vector<f32> - return %1 : f32 -} - -// CHECK-LABEL: @extract_element -func.func @extract_element(%a: vector<16xf32>) -> f32 { - // CHECK: %[[C15:.*]] = arith.constant 15 : i32 - %c = arith.constant 15 : i32 - // CHECK-NEXT: vector.extractelement %{{.*}}[%[[C15]] : i32] : vector<16xf32> - %1 = vector.extractelement %a[%c : i32] : vector<16xf32> - return %1 : f32 -} - // CHECK-LABEL: @extract_const_idx func.func @extract_const_idx(%arg0: vector<4x8x16xf32>) -> (vector<4x8x16xf32>, vector<8x16xf32>, vector<16xf32>, f32) { @@ -256,22 +240,6 @@ func.func @extract_poison_idx(%a: vector<4x5xf32>) -> f32 { return %0 : f32 } -// CHECK-LABEL: @insert_element_0d -func.func @insert_element_0d(%a: f32, %b: vector<f32>) -> vector<f32> { - // CHECK-NEXT: vector.insertelement %{{.*}}, %{{.*}}[] : vector<f32> - %1 = vector.insertelement %a, %b[] : vector<f32> - return %1 : vector<f32> -} - -// CHECK-LABEL: @insert_element -func.func @insert_element(%a: f32, %b: vector<16xf32>) -> vector<16xf32> { - // CHECK: %[[C15:.*]] = arith.constant 15 : i32 - %c = arith.constant 15 : i32 - // CHECK-NEXT: vector.insertelement %{{.*}}, %{{.*}}[%[[C15]] : i32] : vector<16xf32> - %1 = vector.insertelement %a, %b[%c : i32] : vector<16xf32> - return %1 : vector<16xf32> -} - // CHECK-LABEL: @insert_const_idx func.func @insert_const_idx(%a: f32, %b: vector<16xf32>, %c: vector<8x16xf32>, %res: vector<4x8x16xf32>) -> vector<4x8x16xf32> { diff --git a/mlir/test/IR/test-pattern-logging-listener.mlir b/mlir/test/IR/test-pattern-logging-listener.mlir index c521110..d3d42e3 100644 --- a/mlir/test/IR/test-pattern-logging-listener.mlir +++ b/mlir/test/IR/test-pattern-logging-listener.mlir @@ -8,15 +8,15 @@ // {anonymous_namespace} vs `anonymous_namespace` (and maybe others?) on the // various platforms. -// CHECK: [pattern-logging-listener] +// CHECK: [pattern-logging-listener:1] // CHECK-SAME: ::ReplaceWithNewOp | notifyOperationInserted | test.new_op -// CHECK: [pattern-logging-listener] +// CHECK: [pattern-logging-listener:1] // CHECK-SAME: ::ReplaceWithNewOp | notifyOperationReplaced (with values) | test.replace_with_new_op -// CHECK: [pattern-logging-listener] +// CHECK: [pattern-logging-listener:1] // CHECK-SAME: ::ReplaceWithNewOp | notifyOperationModified | arith.addi -// CHECK: [pattern-logging-listener] +// CHECK: [pattern-logging-listener:1] // CHECK-SAME: ::ReplaceWithNewOp | notifyOperationModified | arith.addi -// CHECK: [pattern-logging-listener] +// CHECK: [pattern-logging-listener:1] // CHECK-SAME: ::ReplaceWithNewOp | notifyOperationErased | test.replace_with_new_op func.func @replace_with_new_op() -> i32 { %a = "test.replace_with_new_op"() : () -> (i32) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir index 05e6782..a7bb039 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-unpack-mmt4d.mlir @@ -81,21 +81,21 @@ func.func private @matmul(%A: tensor<7x16xi32>, %B: tensor<16x13xi32>, %C: tenso func.func private @mmt4d(%A: tensor<7x16xi32>, %B: tensor<16x13xi32>, %C: tensor<7x13xi32>) -> tensor<7x13xi32> { %zero = arith.constant 0 : i32 - %A_pack_empty = tensor.empty() : tensor<2x16x8x1xi32> + %A_pack_empty = tensor.empty() : tensor<1x16x8x1xi32> %B_pack_empty = tensor.empty() : tensor<2x16x8x1xi32> - %C_pack_empty = tensor.empty() : tensor<2x2x8x8xi32> + %C_pack_empty = tensor.empty() : tensor<1x2x8x8xi32> // Pack matrices - %A_pack = linalg.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<2x16x8x1xi32> + %A_pack = linalg.pack %A padding_value(%zero : i32) inner_dims_pos = [0, 1] inner_tiles = [8, 1] into %A_pack_empty : tensor<7x16xi32> -> tensor<1x16x8x1xi32> %B_pack = linalg.pack %B padding_value(%zero : i32) outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [8, 1] into %B_pack_empty : tensor<16x13xi32> -> tensor<2x16x8x1xi32> - %C_pack = linalg.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<2x2x8x8xi32> + %C_pack = linalg.pack %C padding_value(%zero : i32) outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_pack_empty : tensor<7x13xi32> -> tensor<1x2x8x8xi32> // MMT4D - %mmt4d = linalg.mmt4d ins(%A_pack, %B_pack : tensor<2x16x8x1xi32>, tensor<2x16x8x1xi32>) outs(%C_pack : tensor<2x2x8x8xi32>) -> tensor<2x2x8x8xi32> + %mmt4d = linalg.mmt4d ins(%A_pack, %B_pack : tensor<1x16x8x1xi32>, tensor<2x16x8x1xi32>) outs(%C_pack : tensor<1x2x8x8xi32>) -> tensor<1x2x8x8xi32> // Unpack output %C_out_empty = tensor.empty() : tensor<7x13xi32> - %C_out_unpack = linalg.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<2x2x8x8xi32> -> tensor<7x13xi32> + %C_out_unpack = linalg.unpack %mmt4d outer_dims_perm = [0, 1] inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %C_out_empty : tensor<1x2x8x8xi32> -> tensor<7x13xi32> return %C_out_unpack : tensor<7x13xi32> } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir b/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir index 6e2a82b..6ec1031 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/0-d-vectors.mlir @@ -4,14 +4,14 @@ // RUN: FileCheck %s func.func @extract_element_0d(%a: vector<f32>) { - %1 = vector.extractelement %a[] : vector<f32> + %1 = vector.extract %a[] : f32 from vector<f32> // CHECK: 42 vector.print %1: f32 return } func.func @insert_element_0d(%a: f32, %b: vector<f32>) -> (vector<f32>) { - %1 = vector.insertelement %a, %b[] : vector<f32> + %1 = vector.insert %a, %b[] : f32 into vector<f32> return %1: vector<f32> } @@ -58,9 +58,9 @@ func.func @broadcast_0d(%a: f32) { func.func @bitcast_0d() { %0 = arith.constant 42 : i32 %1 = arith.constant dense<0> : vector<i32> - %2 = vector.insertelement %0, %1[] : vector<i32> + %2 = vector.insert %0, %1[] : i32 into vector<i32> %3 = vector.bitcast %2 : vector<i32> to vector<f32> - %4 = vector.extractelement %3[] : vector<f32> + %4 = vector.extract %3[] : f32 from vector<f32> %5 = arith.bitcast %4 : f32 to i32 // CHECK: 42 vector.print %5: i32 diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir index b69a200..eb99886 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir @@ -72,7 +72,7 @@ func.func @za0_d_f64() -> i32 { %row = vector.load %mem2[%vnum, %c0] : memref<?x?xf64>, vector<[2]xf64> %inner_add_reduce = scf.for %offset = %c0 to %svl_d step %c1_index iter_args(%inner_iter = %init_0_f64) -> (f64) { - %t = vector.extractelement %row[%offset : index] : vector<[2]xf64> + %t = vector.extract %row[%offset] : f64 from vector<[2]xf64> %inner_add_reduce_next = arith.addf %inner_iter, %t : f64 scf.yield %inner_add_reduce_next : f64 } @@ -102,7 +102,7 @@ func.func @za0_d_f64() -> i32 { %cmp = arith.cmpf one, %row_1, %row_2 : vector<[2]xf64> %inner_mul_reduce = scf.for %i = %c0 to %svl_d step %c1_index iter_args(%inner_iter = %init_1) -> (i64) { - %t = vector.extractelement %cmp[%i : index] : vector<[2]xi1> + %t = vector.extract %cmp[%i] : i1 from vector<[2]xi1> %t_i64 = arith.extui %t : i1 to i64 %inner_mul_reduce_next = arith.muli %inner_iter, %t_i64 : i64 scf.yield %inner_mul_reduce_next : i64 @@ -125,7 +125,7 @@ func.func @za0_d_f64() -> i32 { %cmp = arith.cmpf oeq, %row_1, %row_2 : vector<[2]xf64> %inner_mul_reduce = scf.for %i = %c0 to %svl_d step %c1_index iter_args(%inner_iter = %init_1) -> (i64) { - %t = vector.extractelement %cmp[%i : index] : vector<[2]xi1> + %t = vector.extract %cmp[%i] : i1 from vector<[2]xi1> %t_i64 = arith.extui %t : i1 to i64 %inner_mul_reduce_next = arith.muli %inner_iter, %t_i64 : i64 scf.yield %inner_mul_reduce_next : i64 diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir index 697fb90..ad8e321 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir @@ -36,7 +36,7 @@ func.func @entry() -> i32 { %row = vector.load %za_b[%vnum, %c0] : memref<?x?xi8>, vector<[16]xi8> %inner_mul_reduce = scf.for %offset = %c0 to %svl_b step %c1_index iter_args(%inner_iter = %init_1) -> (i64) { - %t = vector.extractelement %row[%offset : index] : vector<[16]xi8> + %t = vector.extract %row[%offset] : i8 from vector<[16]xi8> %t_i64 = arith.extui %t : i8 to i64 %inner_mul_reduce_next = arith.muli %inner_iter, %t_i64 : i64 scf.yield %inner_mul_reduce_next : i64 @@ -64,7 +64,7 @@ func.func @entry() -> i32 { %row = vector.load %za_b[%vnum, %c0] : memref<?x?xi8>, vector<[16]xi8> %inner_mul_reduce = scf.for %offset = %c0 to %svl_b step %c1_index iter_args(%inner_iter = %init_1) -> (i64) { - %t = vector.extractelement %row[%offset : index] : vector<[16]xi8> + %t = vector.extract %row[%offset] : i8 from vector<[16]xi8> %t_i64 = arith.extui %t : i8 to i64 %inner_mul_reduce_next = arith.muli %inner_iter, %t_i64 : i64 scf.yield %inner_mul_reduce_next : i64 diff --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/dot.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/dot.mlir index 53a7282..aff272c2 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/dot.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/dot.mlir @@ -11,8 +11,8 @@ func.func @entry() -> i32 { %b = arith.constant dense<[9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0]> : vector<8xf32> %r = x86vector.avx.intr.dot %a, %b : vector<8xf32> - %1 = vector.extractelement %r[%i0 : i32]: vector<8xf32> - %2 = vector.extractelement %r[%i4 : i32]: vector<8xf32> + %1 = vector.extract %r[%i0] : f32 from vector<8xf32> + %2 = vector.extract %r[%i4] : f32 from vector<8xf32> %d = arith.addf %1, %2 : f32 // CHECK: ( 110, 110, 110, 110, 382, 382, 382, 382 ) diff --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/sparse-dot-product.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/sparse-dot-product.mlir index bf1caaa..1c56990 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/sparse-dot-product.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/sparse-dot-product.mlir @@ -196,13 +196,13 @@ func.func @memref_dot_optimized(%m_A : memref<?xi64>, %m_B : memref<?xf64>, iter_args(%sum0 = %data_zero, %b_start0 = %c0) -> (f64, index) { %v_A = vector.transfer_read %m_A[%a], %index_padding : memref<?xi64>, vector<8xi64> - %segA_min = vector.extractelement %v_A[%i0 : i32] : vector<8xi64> + %segA_min = vector.extract %v_A[%i0] : i64 from vector<8xi64> %r1, %next_b_start0 = scf.for %b = %b_start0 to %N step %c8 iter_args(%sum1 = %sum0, %b_start1 = %b_start0) -> (f64, index) { %v_C = vector.transfer_read %m_C[%b], %index_padding : memref<?xi64>, vector<8xi64> - %segB_max = vector.extractelement %v_C[%i7 : i32] : vector<8xi64> + %segB_max = vector.extract %v_C[%i7] : i64 from vector<8xi64> %seg1_done = arith.cmpi "slt", %segB_max, %segA_min : i64 %r2, %next_b_start1 = scf.if %seg1_done -> (f64, index) { @@ -273,10 +273,10 @@ func.func @memref_dot_while(%m_A : memref<?xi64>, %m_B : memref<?xf64>, %v_C = vector.transfer_read %m_C[%b1], %index_padding : memref<?xi64>, vector<8xi64> - %segA_min = vector.extractelement %v_A[%i0 : i32] : vector<8xi64> - %segA_max = vector.extractelement %v_A[%i7 : i32] : vector<8xi64> - %segB_min = vector.extractelement %v_C[%i0 : i32] : vector<8xi64> - %segB_max = vector.extractelement %v_C[%i7 : i32] : vector<8xi64> + %segA_min = vector.extract %v_A[%i0] : i64 from vector<8xi64> + %segA_max = vector.extract %v_A[%i7] : i64 from vector<8xi64> + %segB_min = vector.extract %v_C[%i0] : i64 from vector<8xi64> + %segB_max = vector.extract %v_C[%i7] : i64 from vector<8xi64> %seg1_done = arith.cmpi "slt", %segB_max, %segA_min : i64 %r2, %a2, %b2 = scf.if %seg1_done -> (f64, index, index) { @@ -370,8 +370,8 @@ func.func @memref_dot_while_branchless(%m_A : memref<?xi64>, %m_B : memref<?xf64 -> f64 %r2 = arith.addf %r1, %subresult : f64 - %segA_max = vector.extractelement %v_A[%i7 : i32] : vector<8xi64> - %segB_max = vector.extractelement %v_C[%i7 : i32] : vector<8xi64> + %segA_max = vector.extract %v_A[%i7] : i64 from vector<8xi64> + %segB_max = vector.extract %v_C[%i7] : i64 from vector<8xi64> %cond_a = arith.cmpi "sle", %segA_max, %segB_max : i64 %cond_a_i64 = arith.extui %cond_a : i1 to i64 diff --git a/mlir/test/Integration/Dialect/Vector/CPU/compress.mlir b/mlir/test/Integration/Dialect/Vector/CPU/compress.mlir index e9a66cc..1683fa5 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/compress.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/compress.mlir @@ -28,8 +28,7 @@ func.func @printmem16(%A: memref<?xf32>) { %mem = scf.for %i = %c0 to %c16 step %c1 iter_args(%m_iter = %m) -> (vector<16xf32>) { %c = memref.load %A[%i] : memref<?xf32> - %i32 = arith.index_cast %i : index to i32 - %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32> + %m_new = vector.insert %c, %m_iter[%i] : f32 into vector<16xf32> scf.yield %m_new : vector<16xf32> } vector.print %mem : vector<16xf32> @@ -49,7 +48,7 @@ func.func @entry() { memref.store %z, %A[%i] : memref<?xf32> %i32 = arith.index_cast %i : index to i32 %fi = arith.sitofp %i32 : i32 to f32 - %v_new = vector.insertelement %fi, %v_iter[%i32 : i32] : vector<16xf32> + %v_new = vector.insert %fi, %v_iter[%i] : f32 into vector<16xf32> scf.yield %v_new : vector<16xf32> } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/maskedstore.mlir b/mlir/test/Integration/Dialect/Vector/CPU/maskedstore.mlir index 2dc00df..826da53 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/maskedstore.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/maskedstore.mlir @@ -28,8 +28,7 @@ func.func @printmem16(%A: memref<?xf32>) { %mem = scf.for %i = %c0 to %c16 step %c1 iter_args(%m_iter = %m) -> (vector<16xf32>) { %c = memref.load %A[%i] : memref<?xf32> - %i32 = arith.index_cast %i : index to i32 - %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<16xf32> + %m_new = vector.insert %c, %m_iter[%i] : f32 into vector<16xf32> scf.yield %m_new : vector<16xf32> } vector.print %mem : vector<16xf32> @@ -53,7 +52,7 @@ func.func @entry() { iter_args(%v_iter = %v) -> (vector<16xf32>) { %i32 = arith.index_cast %i : index to i32 %fi = arith.sitofp %i32 : i32 to f32 - %v_new = vector.insertelement %fi, %v_iter[%i32 : i32] : vector<16xf32> + %v_new = vector.insert %fi, %v_iter[%i] : f32 into vector<16xf32> scf.yield %v_new : vector<16xf32> } diff --git a/mlir/test/Integration/Dialect/Vector/CPU/scatter.mlir b/mlir/test/Integration/Dialect/Vector/CPU/scatter.mlir index 54b6e69..22b5eef 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/scatter.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/scatter.mlir @@ -21,8 +21,7 @@ func.func @printmem8(%A: memref<?xf32>) { %mem = scf.for %i = %c0 to %c8 step %c1 iter_args(%m_iter = %m) -> (vector<8xf32>) { %c = memref.load %A[%i] : memref<?xf32> - %i32 = arith.index_cast %i : index to i32 - %m_new = vector.insertelement %c, %m_iter[%i32 : i32] : vector<8xf32> + %m_new = vector.insert %c, %m_iter[%i] : f32 into vector<8xf32> scf.yield %m_new : vector<8xf32> } vector.print %mem : vector<8xf32> diff --git a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir index 2393bd1..639eed4 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/transfer-read-1d.mlir @@ -200,7 +200,7 @@ func.func @entry() { // CHECK: ( 2, 12, 22, -1, -1, -42, -42, -42, -42 ) // 6. Read a scalar from a 2D memref and broadcast the value to a 1D vector. - // Generates a loop with vector.insertelement. + // Generates a loop with vector.insert. call @transfer_read_1d_broadcast(%A, %c1, %c2) : (memref<?x?xf32>, index, index) -> () // CHECK: ( 12, 12, 12, 12, 12, 12, 12, 12, 12 ) diff --git a/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir b/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir index e665653..731bd5a 100644 --- a/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir +++ b/mlir/test/Integration/GPU/Vulkan/vector-interleave.mlir @@ -26,17 +26,17 @@ module attributes { %val2 = memref.load %arg1[%idx0] : memref<2xi32> %val3 = memref.load %arg1[%idx1] : memref<2xi32> - %lhs0 = vector.insertelement %val0, %lhs[%idx0 : index] : vector<2xi32> - %lhs1 = vector.insertelement %val1, %lhs0[%idx1 : index] : vector<2xi32> - %rhs0 = vector.insertelement %val2, %rhs[%idx0 : index] : vector<2xi32> - %rhs1 = vector.insertelement %val3, %rhs0[%idx1 : index] : vector<2xi32> + %lhs0 = vector.insert %val0, %lhs[%idx0] : i32 into vector<2xi32> + %lhs1 = vector.insert %val1, %lhs0[%idx1] : i32 into vector<2xi32> + %rhs0 = vector.insert %val2, %rhs[%idx0] : i32 into vector<2xi32> + %rhs1 = vector.insert %val3, %rhs0[%idx1] : i32 into vector<2xi32> %interleave = vector.interleave %lhs1, %rhs1 : vector<2xi32> -> vector<4xi32> - %res0 = vector.extractelement %interleave[%idx0 : index] : vector<4xi32> - %res1 = vector.extractelement %interleave[%idx1 : index] : vector<4xi32> - %res2 = vector.extractelement %interleave[%idx2 : index] : vector<4xi32> - %res3 = vector.extractelement %interleave[%idx3 : index] : vector<4xi32> + %res0 = vector.extract %interleave[%idx0] : i32 from vector<4xi32> + %res1 = vector.extract %interleave[%idx1] : i32 from vector<4xi32> + %res2 = vector.extract %interleave[%idx2] : i32 from vector<4xi32> + %res3 = vector.extract %interleave[%idx3] : i32 from vector<4xi32> memref.store %res0, %arg2[%idx0]: memref<4xi32> memref.store %res1, %arg2[%idx1]: memref<4xi32> diff --git a/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir b/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir index dc53fe3..c1b7dba 100644 --- a/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir +++ b/mlir/test/Integration/GPU/Vulkan/vector-shuffle.mlir @@ -26,17 +26,17 @@ module attributes { %val2 = memref.load %arg1[%idx0] : memref<2xi32> %val3 = memref.load %arg1[%idx1] : memref<2xi32> - %lhs0 = vector.insertelement %val0, %lhs[%idx0 : index] : vector<2xi32> - %lhs1 = vector.insertelement %val1, %lhs0[%idx1 : index] : vector<2xi32> - %rhs0 = vector.insertelement %val2, %rhs[%idx0 : index] : vector<2xi32> - %rhs1 = vector.insertelement %val3, %rhs0[%idx1 : index] : vector<2xi32> + %lhs0 = vector.insert %val0, %lhs[%idx0] : i32 into vector<2xi32> + %lhs1 = vector.insert %val1, %lhs0[%idx1] : i32 into vector<2xi32> + %rhs0 = vector.insert %val2, %rhs[%idx0] : i32 into vector<2xi32> + %rhs1 = vector.insert %val3, %rhs0[%idx1] : i32 into vector<2xi32> %shuffle = vector.shuffle %lhs1, %rhs1[2, 1, 3, 3] : vector<2xi32>, vector<2xi32> - %res0 = vector.extractelement %shuffle[%idx0 : index] : vector<4xi32> - %res1 = vector.extractelement %shuffle[%idx1 : index] : vector<4xi32> - %res2 = vector.extractelement %shuffle[%idx2 : index] : vector<4xi32> - %res3 = vector.extractelement %shuffle[%idx3 : index] : vector<4xi32> + %res0 = vector.extract %shuffle[%idx0] : i32 from vector<4xi32> + %res1 = vector.extract %shuffle[%idx1] : i32 from vector<4xi32> + %res2 = vector.extract %shuffle[%idx2] : i32 from vector<4xi32> + %res3 = vector.extract %shuffle[%idx3] : i32 from vector<4xi32> memref.store %res0, %arg2[%idx0]: memref<4xi32> memref.store %res1, %arg2[%idx1]: memref<4xi32> diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir index cdbca72..7888462 100644 --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir @@ -595,16 +595,17 @@ module attributes {transform.with_named_sequence} { // ----- -// It is valid to fuse the pack op with padding semantics if the tiled -// dimensions do not need padding. +// It is valid to fuse the pack op with padding semantics if it is a perfect +// tiling case. func.func @fuse_pack_consumer_with_padding_semantics(%arg0: tensor<64x32xf32>, %arg1: tensor<64x32xf32>) -> tensor<22x2x3x16xf32> { - %0 = scf.forall (%arg2) = (0) to (32) step (16) shared_outs(%arg3 = %arg1) -> (tensor<64x32xf32>) { - %src = tensor.extract_slice %arg0[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %dest = tensor.extract_slice %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %2 = linalg.exp ins(%src : tensor<64x16xf32>) outs(%dest : tensor<64x16xf32>) -> tensor<64x16xf32> + %0 = scf.forall (%arg2, %arg3) = (0, 0) to (64, 32) step (15, 16) shared_outs(%arg4 = %arg1) -> (tensor<64x32xf32>) { + %size = affine.min affine_map<(d0) -> (-d0 + 64, 15)>(%arg2) + %src = tensor.extract_slice %arg0[%arg2, %arg3] [%size, 16] [1, 1] : tensor<64x32xf32> to tensor<?x16xf32> + %dest = tensor.extract_slice %arg4[%arg2, %arg3] [%size, 16] [1, 1] : tensor<64x32xf32> to tensor<?x16xf32> + %2 = linalg.exp ins(%src : tensor<?x16xf32>) outs(%dest : tensor<?x16xf32>) -> tensor<?x16xf32> scf.forall.in_parallel { - tensor.parallel_insert_slice %2 into %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x16xf32> into tensor<64x32xf32> + tensor.parallel_insert_slice %2 into %arg4[%arg2, %arg3] [%size, 16] [1, 1] : tensor<?x16xf32> into tensor<64x32xf32> } } %1 = tensor.empty() : tensor<22x2x3x16xf32> @@ -621,109 +622,39 @@ module attributes {transform.with_named_sequence} { transform.yield } } -// CHECK: #[[PACK_RESULT_MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)> +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0) -> (-d0 + 64, 15)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0) -> (d0 floordiv 3)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0) -> (d0 ceildiv 3)> +// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0) -> (d0 floordiv 16)> // CHECK: func.func @fuse_pack_consumer_with_padding_semantics( // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK-DAG: %[[OUT_INIT:.*]] = tensor.empty() : tensor<22x2x3x16xf32> // CHECK-DAG: %[[PAD_VAL:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %{{.*}}:2 = scf.forall (%[[IV:.*]]) = (0) to (32) step (16) -// CHECK-SAME: shared_outs(%[[FIRST_OUT_ARG:.*]] = %[[ARG1]], %[[PACK_OUT_ARG:.*]] = %[[OUT_INIT]]) -// CHECK: %[[ELEM_SRC:.*]] = tensor.extract_slice %[[ARG0]][0, %[[IV]]] [64, 16] [1, 1] -// CHECK: %[[ELEM_DEST:.*]] = tensor.extract_slice %[[FIRST_OUT_ARG]][0, %[[IV]]] [64, 16] [1, 1] +// CHECK: %{{.*}}:2 = scf.forall (%[[I:.*]], %[[J:.*]]) = (0, 0) to (64, 32) step (15, 16) +// CHECK-SAME: shared_outs(%[[ELEM_OUT:.*]] = %[[ARG1]], %[[PACK_OUT:.*]] = %[[OUT_INIT]]) +// CHECK: %[[SIZE:.+]] = affine.min #[[MAP0]](%[[I]]) +// CHECK: %[[ELEM_SRC:.*]] = tensor.extract_slice %[[ARG0]] +// CHECK-SAME: [%[[I]], %[[J]]] [%[[SIZE]], 16] [1, 1] +// CHECK: %[[ELEM_DEST:.*]] = tensor.extract_slice %[[ELEM_OUT]] +// CHECK-SAME: [%[[I]], %[[J]]] [%[[SIZE]], 16] [1, 1] // CHECK: %[[ELEM:.*]] = linalg.exp // CHECK-SAME: ins(%[[ELEM_SRC]] // CHECK-SAME: outs(%[[ELEM_DEST]] -// CHECK-DAG: %[[PACK_RESULT_OFFSET:.*]] = affine.apply #[[PACK_RESULT_MAP]](%[[IV]]) -// CHECK-DAG: %[[TILED_PACK_DEST:.*]] = tensor.extract_slice %[[PACK_OUT_ARG]][0, %[[PACK_RESULT_OFFSET]], 0, 0] [22, 1, 3, 16] [1, 1, 1, 1] -// CHECK: %[[TILED_PACK_OUT:.*]] = linalg.pack %[[ELEM]] -// CHECK-SAME: padding_value(%[[PAD_VAL]] : f32) -// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 16] -// CHECK-SAME: into %[[TILED_PACK_DEST]] -// CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[GENERIC_OUT]] into %[[FIRST_OUT_ARG]][0, %[[IV]]] [64, 16] [1, 1] -// CHECK: tensor.parallel_insert_slice %[[TILED_PACK_OUT]] into %[[PACK_OUT_ARG]][0, %[[PACK_RESULT_OFFSET]], 0, 0] [22, 1, 3, 16] [1, 1, 1, 1] - -// ----- - -// It is valid to fuse the pack if the dimension is not tiled even when it needs -// extra padding. - -func.func @fuse_pack_consumer_with_untiled_extra_padding(%arg0: tensor<64x32xf32>, %arg1: tensor<64x32xf32>) -> tensor<33x2x3x16xf32> { - %0 = scf.forall (%arg2) = (0) to (32) step (16) shared_outs(%arg3 = %arg1) -> (tensor<64x32xf32>) { - %src = tensor.extract_slice %arg0[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %dest = tensor.extract_slice %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %2 = linalg.exp ins(%src : tensor<64x16xf32>) outs(%dest : tensor<64x16xf32>) -> tensor<64x16xf32> - scf.forall.in_parallel { - tensor.parallel_insert_slice %2 into %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x16xf32> into tensor<64x32xf32> - } - } - %1 = tensor.empty() : tensor<33x2x3x16xf32> - %cst = arith.constant 0.000000e+00 : f32 - %pack = linalg.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [3, 16] into %1 : tensor<64x32xf32> -> tensor<33x2x3x16xf32> - return %pack : tensor<33x2x3x16xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.match ops{["scf.forall"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %consumer, %fused_consumer = transform.test.fuse_consumer %0 in(%1) : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.yield - } -} -// CHECK: #[[PACK_RESULT_MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)> -// CHECK: func.func @fuse_pack_consumer_with_untiled_extra_padding( -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK-DAG: %[[OUT_INIT:.*]] = tensor.empty() : tensor<33x2x3x16xf32> -// CHECK-DAG: %[[PAD_VAL:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %{{.*}}:2 = scf.forall (%[[IV:.*]]) = (0) to (32) step (16) -// CHECK-SAME: shared_outs(%[[FIRST_OUT_ARG:.*]] = %[[ARG1]], %[[PACK_OUT_ARG:.*]] = %[[OUT_INIT]]) -// CHECK: %[[ELEM_SRC:.*]] = tensor.extract_slice %[[ARG0]][0, %[[IV]]] [64, 16] [1, 1] -// CHECK: %[[ELEM_DEST:.*]] = tensor.extract_slice %[[FIRST_OUT_ARG]][0, %[[IV]]] [64, 16] [1, 1] -// CHECK: %[[ELEM:.*]] = linalg.exp -// CHECK-SAME: ins(%[[ELEM_SRC]] -// CHECK-SAME: outs(%[[ELEM_DEST]] -// CHECK-DAG: %[[PACK_RESULT_OFFSET:.*]] = affine.apply #[[PACK_RESULT_MAP]](%[[IV]]) -// CHECK-DAG: %[[TILED_PACK_DEST:.*]] = tensor.extract_slice %[[PACK_OUT_ARG]][0, %[[PACK_RESULT_OFFSET]], 0, 0] [33, 1, 3, 16] [1, 1, 1, 1] -// CHECK: %[[TILED_PACK_OUT:.*]] = linalg.pack %[[ELEM]] +// CHECK-DAG: %[[D0_OFFSET:.*]] = affine.apply #[[MAP1]](%[[I]]) +// CHECK-DAG: %[[D0_SIZE:.*]] = affine.apply #[[MAP2]](%[[SIZE]]) +// CHECK-DAG: %[[D1_OFFSET:.*]] = affine.apply #[[MAP3]](%[[J]]) +// CHECK-DAG: %[[PACK_INIT:.*]] = tensor.extract_slice %[[PACK_OUT]] +// CHECK-SAME: [%[[D0_OFFSET]], %[[D1_OFFSET]], 0, 0] [%[[D0_SIZE]], 1, 3, 16] [1, 1, 1, 1] +// CHECK: %[[PACK:.*]] = linalg.pack %[[ELEM]] // CHECK-SAME: padding_value(%[[PAD_VAL]] : f32) // CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [3, 16] // CHECK-SAME: into %[[TILED_PACK_DEST]] // CHECK: scf.forall.in_parallel { -// CHECK: tensor.parallel_insert_slice %[[GENERIC_OUT]] into %[[FIRST_OUT_ARG]][0, %[[IV]]] [64, 16] [1, 1] -// CHECK: tensor.parallel_insert_slice %[[TILED_PACK_OUT]] into %[[PACK_OUT_ARG]][0, %[[PACK_RESULT_OFFSET]], 0, 0] [33, 1, 3, 16] [1, 1, 1, 1] - -// ----- - -// If the dimension is tiled and it needs extra padding, do not fuse the pack -// op. - -func.func @nofuse_pack_consumer_with_extra_padding(%arg0: tensor<64x32xf32>, %arg1: tensor<64x32xf32>) -> tensor<23x32x3x16xf32> { - %0 = scf.forall (%arg2) = (0) to (32) step (16) shared_outs(%arg3 = %arg1) -> (tensor<64x32xf32>) { - %src = tensor.extract_slice %arg0[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %dest = tensor.extract_slice %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x32xf32> to tensor<64x16xf32> - %2 = linalg.exp ins(%src : tensor<64x16xf32>) outs(%dest : tensor<64x16xf32>) -> tensor<64x16xf32> - scf.forall.in_parallel { - // expected-error @below {{failed to fuse consumer of slice}} - tensor.parallel_insert_slice %2 into %arg3[0, %arg2] [64, 16] [1, 1] : tensor<64x16xf32> into tensor<64x32xf32> - } - } - %1 = tensor.empty() : tensor<23x32x3x16xf32> - %cst = arith.constant 0.000000e+00 : f32 - %pack = linalg.pack %0 padding_value(%cst : f32) inner_dims_pos = [0, 1] inner_tiles = [3, 16] into %1 : tensor<64x32xf32> -> tensor<23x32x3x16xf32> - return %pack : tensor<23x32x3x16xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.parallel_insert_slice"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.match ops{["scf.forall"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %consumer, %fused_consumer = transform.test.fuse_consumer %0 in(%1) : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.yield - } -} +// CHECK: tensor.parallel_insert_slice %[[ELEM]] into %[[ELEM_OUT]] +// CHECK-SAME: [%[[I]], %[[J]]] [%[[SIZE]], 16] [1, 1] +// CHECK: tensor.parallel_insert_slice %[[PACK]] into %[[PACK_OUT]] +// CHECK-SAME: [%[[D0_OFFSET]], %[[D1_OFFSET]], 0, 0] [%[[D0_SIZE]], 1, 3, 16] [1, 1, 1, 1] // ----- diff --git a/mlir/test/Target/SPIRV/constant.mlir b/mlir/test/Target/SPIRV/constant.mlir index 76d34c2..6aca11e 100644 --- a/mlir/test/Target/SPIRV/constant.mlir +++ b/mlir/test/Target/SPIRV/constant.mlir @@ -1,6 +1,7 @@ // RUN: mlir-translate --no-implicit-module --split-input-file --test-spirv-roundtrip %s | FileCheck %s +// RUN: %if spirv-tools %{ mlir-translate -no-implicit-module --split-input-file -serialize-spirv %s | spirv-val %} -spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> { +spirv.module Logical Vulkan requires #spirv.vce<v1.3, [VulkanMemoryModel, Shader, Int64, Int16, Int8, Float64, Float16, CooperativeMatrixKHR], [SPV_KHR_vulkan_memory_model, SPV_KHR_cooperative_matrix]> { // CHECK-LABEL: @bool_const spirv.func @bool_const() -> () "None" { // CHECK: spirv.Constant true @@ -305,6 +306,8 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> { %coop = spirv.Constant dense<4> : !spirv.coopmatrix<16x16xi8, Subgroup, MatrixAcc> spirv.ReturnValue %coop : !spirv.coopmatrix<16x16xi8, Subgroup, MatrixAcc> } + + spirv.EntryPoint "GLCompute" @bool_const } // ----- diff --git a/mlir/test/Target/SPIRV/lit.local.cfg b/mlir/test/Target/SPIRV/lit.local.cfg new file mode 100644 index 0000000..6d44394 --- /dev/null +++ b/mlir/test/Target/SPIRV/lit.local.cfg @@ -0,0 +1,4 @@ +if config.spirv_tools_tests: + config.available_features.add("spirv-tools") + config.substitutions.append(("spirv-as", os.path.join(config.llvm_tools_dir, "spirv-as"))) + config.substitutions.append(("spirv-val", os.path.join(config.llvm_tools_dir, "spirv-val"))) diff --git a/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt b/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt index 226e0bb..2ee3222 100644 --- a/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Bufferization/CMakeLists.txt @@ -1,5 +1,6 @@ # Exclude tests from libMLIR.so add_mlir_library(MLIRBufferizationTestPasses + TestOneShotModuleBufferize.cpp TestTensorCopyInsertion.cpp TestTensorLikeAndBufferLike.cpp diff --git a/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp new file mode 100644 index 0000000..1e2d4a7 --- /dev/null +++ b/mlir/test/lib/Dialect/Bufferization/TestOneShotModuleBufferize.cpp @@ -0,0 +1,57 @@ +//===- TestOneShotModuleBufferzation.cpp - Bufferization Test -----*- c++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" +#include "mlir/Dialect/Bufferization/Transforms/OneShotModuleBufferize.h" +#include "mlir/Dialect/Bufferization/Transforms/Transforms.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +struct TestOneShotModuleBufferizePass + : public PassWrapper<TestOneShotModuleBufferizePass, OperationPass<>> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestOneShotModuleBufferizePass) + + TestOneShotModuleBufferizePass() = default; + TestOneShotModuleBufferizePass(const TestOneShotModuleBufferizePass &pass) + : PassWrapper(pass) {} + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert<bufferization::BufferizationDialect>(); + } + StringRef getArgument() const final { + return "test-one-shot-module-bufferize"; + } + StringRef getDescription() const final { + return "Pass to test One Shot Module Bufferization"; + } + + void runOnOperation() override { + + llvm::errs() << "Running TestOneShotModuleBufferize on: " + << getOperation()->getName() << "\n"; + bufferization::OneShotBufferizationOptions opt; + + opt.bufferizeFunctionBoundaries = true; + bufferization::BufferizationState bufferizationState; + + if (failed(bufferization::runOneShotModuleBufferize(getOperation(), opt, + bufferizationState))) + signalPassFailure(); + } +}; +} // namespace + +namespace mlir::test { +void registerTestOneShotModuleBufferizePass() { + PassRegistration<TestOneShotModuleBufferizePass>(); +} +} // namespace mlir::test diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index a7c6cd6..d2bb6bf 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -125,6 +125,15 @@ def SymbolScopeOp : TEST_Op<"symbol_scope", let regions = (region SizedRegion<1>:$region); } +def SymbolScopeIsolatedOp + : TEST_Op<"symbol_scope_isolated", [IsolatedFromAbove, SymbolTable, + SingleBlockImplicitTerminator< + "TerminatorOp">]> { + let summary = + "operation which defines a new symbol table that is IsolatedFromAbove"; + let regions = (region SizedRegion<1>:$region); +} + def SymbolTableRegionOp : TEST_Op<"symbol_table_region", [SymbolTable]> { let summary = "operation which defines a new symbol table without a " "restriction on a terminator"; diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 233fef8..feaf5fb 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -343,7 +343,6 @@ if config.enable_assertions: else: config.available_features.add("noasserts") - def have_host_jit_feature_support(feature_name): mlir_runner_exe = lit.util.which("mlir-runner", config.mlir_tools_dir) diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in index 132aabe..b1185e1 100644 --- a/mlir/test/lit.site.cfg.py.in +++ b/mlir/test/lit.site.cfg.py.in @@ -5,6 +5,7 @@ import sys config.target_triple = "@LLVM_TARGET_TRIPLE@" config.llvm_src_root = "@LLVM_SOURCE_DIR@" config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@") +config.spirv_tools_tests = @LLVM_INCLUDE_SPIRV_TOOLS_TESTS@ config.llvm_shlib_ext = "@SHLIBEXT@" config.llvm_shlib_dir = lit_config.substitute(path(r"@SHLIBDIR@")) config.python_executable = "@Python3_EXECUTABLE@" @@ -41,7 +42,7 @@ config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@ config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@ # This is a workaround for the fact that LIT's: # %if <cond> -# requires <cond> to be in the set of available features. +# requires <cond> to be in the set of available features. # TODO: Update LIT's TestRunner so that this is not required. if config.mlir_run_arm_sve_tests: config.available_features.add("mlir_arm_sve_tests") |