aboutsummaryrefslogtreecommitdiff
path: root/mlir/test
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/test')
-rw-r--r--mlir/test/Conversion/ConvertToSPIRV/vector.mlir36
-rw-r--r--mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir100
-rw-r--r--mlir/test/Dialect/EmitC/invalid_ops.mlir38
-rw-r--r--mlir/test/Dialect/LLVMIR/invalid.mlir6
-rw-r--r--mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir90
-rw-r--r--mlir/test/Dialect/LLVMIR/rocdl.mlir13
-rw-r--r--mlir/test/Dialect/MemRef/ops.mlir7
-rw-r--r--mlir/test/Dialect/OpenACC/invalid.mlir9
-rw-r--r--mlir/test/Dialect/OpenACC/ops.mlir14
-rw-r--r--mlir/test/Dialect/XeGPU/invalid.mlir31
-rw-r--r--mlir/test/Dialect/XeGPU/subgroup-distribute.mlir63
-rw-r--r--mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir2
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir182
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir49
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir22
-rw-r--r--mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir9
-rw-r--r--mlir/test/Target/LLVMIR/rocdl.mlir13
-rw-r--r--mlir/test/Target/SPIRV/group-ops.mlir30
-rw-r--r--mlir/test/Target/SPIRV/subgroup-block-intel.mlir34
-rw-r--r--mlir/test/Transforms/test-legalizer-no-materializations.mlir67
-rw-r--r--mlir/test/Transforms/test-legalizer.mlir39
-rw-r--r--mlir/test/lib/Dialect/Test/TestPatterns.cpp6
-rw-r--r--mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp3
-rw-r--r--mlir/test/python/CMakeLists.txt2
24 files changed, 774 insertions, 91 deletions
diff --git a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir
index a75f30d..cd8cfc8 100644
--- a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir
+++ b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir
@@ -275,6 +275,42 @@ func.func @reduction_minimumf(%v : vector<3xf32>, %s: f32) -> f32 {
// -----
+// CHECK-LABEL: spirv.func @reduction_minnumf(
+// CHECK-SAME: %[[V:.*]]: vector<3xf32>,
+// CHECK-SAME: %[[S:.*]]: f32) -> f32 "None" {
+// CHECK: %[[S0:.*]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xf32>
+// CHECK: %[[S1:.*]] = spirv.CompositeExtract %[[V]][1 : i32] : vector<3xf32>
+// CHECK: %[[S2:.*]] = spirv.CompositeExtract %[[V]][2 : i32] : vector<3xf32>
+// CHECK: %[[MIN0:.*]] = spirv.GL.FMin %[[S0]], %[[S1]] : f32
+// CHECK: %[[MIN1:.*]] = spirv.GL.FMin %[[MIN0]], %[[S2]] : f32
+// CHECK: %[[MIN2:.*]] = spirv.GL.FMin %[[MIN1]], %[[S]] : f32
+// CHECK: spirv.ReturnValue %[[MIN2]] : f32
+// CHECK: }
+func.func @reduction_minnumf(%v : vector<3xf32>, %s: f32) -> f32 {
+ %reduce = vector.reduction <minnumf>, %v, %s : vector<3xf32> into f32
+ return %reduce : f32
+}
+
+// -----
+
+// CHECK-LABEL: spirv.func @reduction_maxnumf(
+// CHECK-SAME: %[[V:.*]]: vector<3xf32>,
+// CHECK-SAME: %[[S:.*]]: f32) -> f32 "None" {
+// CHECK: %[[S0:.*]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xf32>
+// CHECK: %[[S1:.*]] = spirv.CompositeExtract %[[V]][1 : i32] : vector<3xf32>
+// CHECK: %[[S2:.*]] = spirv.CompositeExtract %[[V]][2 : i32] : vector<3xf32>
+// CHECK: %[[MAX0:.*]] = spirv.GL.FMax %[[S0]], %[[S1]] : f32
+// CHECK: %[[MAX1:.*]] = spirv.GL.FMax %[[MAX0]], %[[S2]] : f32
+// CHECK: %[[MAX2:.*]] = spirv.GL.FMax %[[MAX1]], %[[S]] : f32
+// CHECK: spirv.ReturnValue %[[MAX2]] : f32
+// CHECK: }
+func.func @reduction_maxnumf(%v : vector<3xf32>, %s: f32) -> f32 {
+ %reduce = vector.reduction <maxnumf>, %v, %s : vector<3xf32> into f32
+ return %reduce : f32
+}
+
+// -----
+
// CHECK-LABEL: func @reduction_maxsi
// CHECK-SAME: (%[[V:.+]]: vector<3xi32>, %[[S:.+]]: i32)
// CHECK: %[[S0:.+]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xi32>
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
index b616632..b062736 100644
--- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
@@ -243,6 +243,106 @@ func.func @vecdim_reduction_ori(%in: memref<256x512xi32>, %out: memref<256xi32>)
// CHECK: affine.store %[[final_red]], %{{.*}} : memref<256xi32>
// CHECK: }
+// -----
+
+func.func @vecdim_reduction_xori(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant 0 : i32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+ %xor = arith.xori %red_iter, %ld : i32
+ affine.yield %xor : i32
+ }
+ affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @vecdim_reduction_xori(
+// CHECK-SAME: %[[input:.*]]: memref<256x512xi32>,
+// CHECK-SAME: %[[output:.*]]: memref<256xi32>) {
+// CHECK: %[[cst:.*]] = arith.constant 0 : i32
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vzero:.*]] = arith.constant dense<0> : vector<128xi32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xi32>) {
+// CHECK: %[[poison:.*]] = ub.poison : i32
+// CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xi32>, vector<128xi32>
+// CHECK: %[[xor:.*]] = arith.xori %[[red_iter]], %[[ld]] : vector<128xi32>
+// CHECK: affine.yield %[[xor]] : vector<128xi32>
+// CHECK: }
+// CHECK: %[[final_red:.*]] = vector.reduction <xor>, %[[vred]] : vector<128xi32> into i32
+// CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xi32>
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+// -----
+
+func.func @vecdim_reduction_minnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0xFF800000 : f32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+ %min = arith.minnumf %red_iter, %ld : f32
+ affine.yield %min : f32
+ }
+ affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @vecdim_reduction_minnumf(
+// CHECK-SAME: %[[input:.*]]: memref<256x512xf32>,
+// CHECK-SAME: %[[output:.*]]: memref<256xf32>) {
+// CHECK: %[[cst:.*]] = arith.constant 0xFF800000 : f32
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vzero:.*]] = arith.constant dense<0x7FC00000> : vector<128xf32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) {
+// CHECK: %[[poison:.*]] = ub.poison : f32
+// CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32>
+// CHECK: %[[min:.*]] = arith.minnumf %[[red_iter]], %[[ld]] : vector<128xf32>
+// CHECK: affine.yield %[[min]] : vector<128xf32>
+// CHECK: }
+// CHECK: %[[red_scalar:.*]] = vector.reduction <minnumf>, %[[vred]] : vector<128xf32> into f32
+// CHECK: %[[final_red:.*]] = arith.minnumf %[[red_scalar]], %[[cst]] : f32
+// CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32>
+// CHECK: }
+// CHECK: return
+// CHECK: }
+
+// -----
+
+func.func @vecdim_reduction_maxnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0xFF800000 : f32
+ affine.for %i = 0 to 256 {
+ %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+ %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+ %max = arith.maxnumf %red_iter, %ld : f32
+ affine.yield %max : f32
+ }
+ affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @vecdim_reduction_maxnumf(
+// CHECK-SAME: %[[input:.*]]: memref<256x512xf32>,
+// CHECK-SAME: %[[output:.*]]: memref<256xf32>) {
+// CHECK: %[[cst:.*]] = arith.constant 0xFF800000 : f32
+// CHECK: affine.for %{{.*}} = 0 to 256 {
+// CHECK: %[[vzero:.*]] = arith.constant dense<0xFFC00000> : vector<128xf32>
+// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) {
+// CHECK: %[[poison:.*]] = ub.poison : f32
+// CHECK: %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32>
+// CHECK: %[[max:.*]] = arith.maxnumf %[[red_iter]], %[[ld]] : vector<128xf32>
+// CHECK: affine.yield %[[max]] : vector<128xf32>
+// CHECK: }
+// CHECK: %[[red_scalar:.*]] = vector.reduction <maxnumf>, %[[vred]] : vector<128xf32> into f32
+// CHECK: %[[final_red:.*]] = arith.maxnumf %[[red_scalar]], %[[cst]] : f32
+// CHECK: affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32>
+// CHECK: }
+// CHECK: return
+// CHECK: }
// -----
diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index 5f594fb..f285196 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -876,3 +876,41 @@ func.func @test_do(%arg0 : !emitc.ptr<i32>) {
return
}
+
+// -----
+
+func.func @test_for_none_block_argument(%arg0: index) {
+ // expected-error@+1 {{expected body to have a single block argument for the induction variable}}
+ "emitc.for"(%arg0, %arg0, %arg0) (
+ {
+ emitc.yield
+ }
+ ) : (index, index, index) -> ()
+ return
+}
+
+// -----
+
+func.func @test_for_more_than_one_block_argument(%arg0: index) {
+ // expected-error@+1 {{expected body to have a single block argument for the induction variable}}
+ "emitc.for"(%arg0, %arg0, %arg0) (
+ {
+ ^bb0(%i0 : index, %i1 : index):
+ emitc.yield
+ }
+ ) : (index, index, index) -> ()
+ return
+}
+
+// -----
+
+func.func @test_for_unmatch_type(%arg0: index) {
+ // expected-error@+1 {{expected induction variable to be same type as bounds}}
+ "emitc.for"(%arg0, %arg0, %arg0) (
+ {
+ ^bb0(%i0 : f32):
+ emitc.yield
+ }
+ ) : (index, index, index) -> ()
+ return
+}
diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir
index aaf9f80..49b6342 100644
--- a/mlir/test/Dialect/LLVMIR/invalid.mlir
+++ b/mlir/test/Dialect/LLVMIR/invalid.mlir
@@ -664,21 +664,21 @@ func.func @zero_non_llvm_type() {
// -----
func.func @nvvm_invalid_shfl_pred_1(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) {
- // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}}
+ // expected-error@+1 {{expected return type to be a two-element struct}}
%0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> i32
}
// -----
func.func @nvvm_invalid_shfl_pred_2(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) {
- // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}}
+ // expected-error@+1 {{expected return type to be a two-element struct}}
%0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32)>
}
// -----
func.func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) {
- // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}}
+ // expected-error@+1 {{expected second element in the returned struct to be of type 'i1' but got 'i32' instead}}
%0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i32)>
}
diff --git a/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir
new file mode 100644
index 0000000..35f5e1b
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir
@@ -0,0 +1,90 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics
+
+// Test invalid target architecture (sm_100 instead of sm_100a)
+gpu.module @invalid_arch_sm_100 [#nvvm.target<chip = "sm_100">] {
+ func.func @convert_rs() {
+ %f1 = llvm.mlir.constant(1.0 : f32) : f32
+ %f2 = llvm.mlir.constant(2.0 : f32) : f32
+ %rbits = llvm.mlir.constant(0x12345678 : i32) : i32
+ // expected-error@+1 {{'nvvm.convert.f32x2.to.f16x2' op is not supported on sm_100}}
+ %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits : vector<2xf16>
+ return
+ }
+}
+
+// -----
+
+// Test that operations require stochastic rounding mode
+llvm.func @invalid_rnd_mode_f16x2(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> {
+ // expected-error@+1 {{Only RS rounding mode is supported for conversions from f32x2 to f16x2.}}
+ %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16>
+ llvm.return %res : vector<2xf16>
+}
+
+// -----
+
+llvm.func @invalid_rnd_mode_bf16x2(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> {
+ // expected-error@+1 {{Only RS rounding mode is supported for conversions from f32x2 to bf16x2.}}
+ %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16>
+ llvm.return %res : vector<2xbf16>
+}
+
+// -----
+
+// Test invalid destination types for f8x4 (should only accept f8E4M3FN, f8E5M2)
+llvm.func @invalid_dst_type_f8x4_e3m4(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // expected-error@+1 {{Only 'f8E4M3FN' and 'f8E5M2' types are supported for conversions from f32x4 to f8x4.}}
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E3M4)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+llvm.func @invalid_dst_type_f8x4_e8m0(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // expected-error@+1 {{Only 'f8E4M3FN' and 'f8E5M2' types are supported for conversions from f32x4 to f8x4.}}
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E8M0FNU)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test invalid destination types for f6x4 (should only accept f6E2M3FN, f6E3M2FN)
+llvm.func @invalid_dst_type_f6x4_f8(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // expected-error@+1 {{Only 'f6E2M3FN' and 'f6E3M2FN' types are supported for conversions from f32x4 to f6x4.}}
+ %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E4M3FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test invalid destination type for f4x4 (should only accept f4E2M1FN)
+llvm.func @invalid_dst_type_f4x4_f6(%src : vector<4xf32>, %rbits : i32) -> i16 {
+ // expected-error@+1 {{Only 'f4E2M1FN' type is supported for conversions from f32x4 to f4x4.}}
+ %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits : vector<4xf32> -> i16 (f6E2M3FN)
+ llvm.return %res : i16
+}
+
+// -----
+
+// Test invalid rounding modes for non-stochastic ops
+llvm.func @convert_float_to_tf32_rs_not_supported(%src : f32) -> i32 {
+ // expected-error @below {{Only {rn,rz,rna} rounding modes supported for ConvertFloatToTF32Op.}}
+ %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode<rs>}
+ llvm.return %res : i32
+}
+
+// -----
+
+llvm.func @convert_f32x2_to_f8x2_rs_not_supported(%a : f32, %b : f32) {
+ // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}}
+ %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rs>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E4M3FN)
+ llvm.return
+}
+
+// -----
+
+llvm.func @convert_bf16x2_to_f8x2_rs_not_supported(%src : vector<2xbf16>) {
+ // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from bf16x2 to f8x2.}}
+ %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rs>} : vector<2xbf16> -> i16 (f8E8M0FNU)
+ llvm.return
+}
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index e703600..5e85759 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
llvm.return
}
+llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
+ // CHECK-LABEL @rocdl.global.load.async.to.lds
+ // CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0
+ // CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0
+ // CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0
+ // CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0
+ rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>, <3>
+ rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>, <3>
+ rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>, <3>
+ rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>, <3>
+ llvm.return
+}
+
// CHECK-LABEL @rocdl.tensor.load.to.lds
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir
index a90c950..b1db99b 100644
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@@ -440,7 +440,8 @@ func.func @expand_collapse_shape_dynamic(%arg0: memref<?x?x?xf32>,
%arg4: index,
%arg5: index,
%arg6: index,
- %arg7: memref<4x?x4xf32>) {
+ %arg7: memref<4x?x4xf32>,
+ %arg8: memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>>) {
// CHECK: memref.collapse_shape {{.*}} {{\[}}[0, 1], [2]]
// CHECK-SAME: memref<?x?x?xf32> into memref<?x?xf32>
%0 = memref.collapse_shape %arg0 [[0, 1], [2]] :
@@ -489,6 +490,10 @@ func.func @expand_collapse_shape_dynamic(%arg0: memref<?x?x?xf32>,
// CHECK: memref.expand_shape {{.*}} {{\[}}[0, 1], [2], [3, 4]]
%4 = memref.expand_shape %arg7 [[0, 1], [2], [3, 4]] output_shape [2, 2, %arg4, 2, 2]
: memref<4x?x4xf32> into memref<2x2x?x2x2xf32>
+
+// CHECK: memref.collapse_shape {{.*}} {{\[}}[0, 1], [2], [3]]
+// CHECK-SAME: memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>> into memref<1x18x?xsi8, strided<[?, ?, 1], offset: ?>>
+ %5 = memref.collapse_shape %arg8 [[0, 1], [2], [3]] : memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>> into memref<1x18x?xsi8, strided<[?, ?, 1], offset: ?>>
return
}
diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
index 26b63fb..0e75894 100644
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -492,6 +492,15 @@ func.func @fct1(%0 : !llvm.ptr) -> () {
// -----
+%i1 = arith.constant 1 : i32
+%i2 = arith.constant 10 : i32
+// expected-error@+1 {{unstructured acc.loop must not have induction variables}}
+acc.loop control(%iv : i32) = (%i1 : i32) to (%i2 : i32) step (%i1 : i32) {
+ acc.yield
+} attributes {independent = [#acc.device_type<none>], unstructured}
+
+// -----
+
// expected-error@+1 {{expect at least one of num, dim or static values}}
acc.loop gang({}) {
"test.openacc_dummy_op"() : () -> ()
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index 042ee25..df8ab9b 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -2143,6 +2143,20 @@ func.func @acc_loop_container() {
// -----
+func.func @acc_unstructured_loop() {
+ acc.loop {
+ acc.yield
+ } attributes {independent = [#acc.device_type<none>], unstructured}
+ return
+}
+
+// CHECK-LABEL: func.func @acc_unstructured_loop
+// CHECK: acc.loop
+// CHECK: acc.yield
+// CHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
+
+// -----
+
// Test private recipe with data bounds for array slicing
acc.private.recipe @privatization_memref_slice : memref<10x10xf32> init {
^bb0(%arg0: memref<10x10xf32>, %bounds0: !acc.data_bounds_ty, %bounds1: !acc.data_bounds_ty):
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
index ebbe3ce..92f3537 100644
--- a/mlir/test/Dialect/XeGPU/invalid.mlir
+++ b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -451,7 +451,7 @@ func.func @store_scatter_offset_wi_1(%src: memref<?xf16>) {
%offsets = arith.constant dense<[0]> : vector<1xindex>
%mask = arith.constant dense<1>: vector<1xi1>
// expected-error@+1 {{Mask should match value except the chunk size dim}}
- xegpu.store %val, %src[%offsets], %mask
+ xegpu.store %val, %src[%offsets], %mask
: vector<4xf16>, memref<?xf16>, vector<1xindex>, vector<1xi1>
return
}
@@ -871,14 +871,6 @@ func.func @load_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>) {
}
// -----
-func.func @load_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>) {
- // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
- %data2 = xegpu.load_matrix %arg0[8, 8] <{subgroup_block_io}>: !xegpu.mem_desc<16x64xf16> -> vector<16x16xf16>
- return
-}
-
-
-// -----
func.func @store_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf32>) {
// expected-error@+1 {{failed to verify that all of {mem_desc, data} have same element type}}
xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.mem_desc<16x64xf16>
@@ -900,16 +892,25 @@ func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: ve
}
// -----
-func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) {
- // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
- xegpu.store_matrix %data, %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
+func.func @simt_store_matrix_vector_nonlinear(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>, %arg1: vector<2x16xf32>) {
+ // expected-error@+1 {{With subgroup_block_io, accessed data must be contiguous and coalesced}}
+ xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} :
+ vector<2x16xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>
return
}
// -----
-func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) {
- // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}}
- xegpu.store_matrix %data, %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16>
+func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>, %arg1: vector<16x2xf32>) {
+ // expected-error@+1 {{With subgroup_block_io, the distributed dimensions must be contiguous}}
+ xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} :
+ vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>
return
}
+// -----
+func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>>, %arg1: vector<16x2xf32>) {
+ // expected-error@+1 {{With subgroup_block_io, the block shape must match the lane layout}}
+ xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} :
+ vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>>
+ return
+}
diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
index 27a3dc3..8946d14 100644
--- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
+++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir
@@ -265,3 +265,66 @@ gpu.module @xevm_module{
gpu.return
}
}
+
+// -----
+// CHECK-LABEL: gpu.func @load_store_matrix_1({{.*}}) {
+// CHECK: %[[LAYOUT_X:.*]] = arith.constant 8 : index
+// CHECK: %[[LAYOUT_Y:.*]] = arith.constant 2 : index
+// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
+// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
+// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
+// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_Y]], %[[LAYOUT_Y]]
+// CHECK: %[[LANE_X_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[LAYOUT_X]]
+// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32>
+// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<1x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
+gpu.module @xevm_module{
+ gpu.func @load_store_matrix_1(%arg0: !xegpu.mem_desc<32x32xf32>) {
+ %c0 = arith.constant 0 : index
+ %1 = xegpu.load_matrix %arg0[%c0, %c0] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}> : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x8xf32>
+ xegpu.store_matrix %1, %arg0[%c0, %c0] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}> : vector<2x8xf32>, !xegpu.mem_desc<32x32xf32>, index, index
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @load_store_matrix_2({{.*}}) {
+// CHECK: %[[DIST_UNIT_HEIGHT_X:.*]] = arith.constant 4 : index
+// CHECK: %[[DIST_UNIT_HEIGHT_Y:.*]] = arith.constant 8 : index
+// CHECK: %[[LANE_DATA_Y:.*]] = arith.constant 2 : index
+// CHECK: %[[USER_OFFSET_X:.*]] = arith.constant 1 : index
+// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
+// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
+// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
+// CHECK: %[[LANE_Y_OFFSET_1:.*]] = index.mul %[[DELINEARIZED_LANE_Y]], %[[LANE_DATA_Y]]
+// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[LANE_Y_OFFSET_1]], %[[DIST_UNIT_HEIGHT_Y]]
+// CHECK: %[[LANE_X_OFFSET_1:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[DIST_UNIT_HEIGHT_X]]
+// CHECK: %[[LANE_X_OFFSET:.*]] = index.add %[[LANE_X_OFFSET_1]], %[[USER_OFFSET_X]]
+// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32>
+// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
+gpu.module @xevm_module{
+ gpu.func @load_store_matrix_2(%arg0: !xegpu.mem_desc<32x32xf32>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %1 = xegpu.load_matrix %arg0[%c0, %c1] <{layout = #xegpu.layout<lane_layout = [4, 4], lane_data = [2, 1]>}> : !xegpu.mem_desc<32x32xf32>, index, index -> vector<8x4xf32>
+ xegpu.store_matrix %1, %arg0[%c0, %c1] <{layout = #xegpu.layout<lane_layout = [4, 4], lane_data = [2, 1]>}> : vector<8x4xf32>, !xegpu.mem_desc<32x32xf32>, index, index
+ gpu.return
+ }
+}
+
+// -----
+// CHECK-LABEL: gpu.func @load_store_matrix_3({{.*}}) {
+// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>:
+// CHECK-SAME: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 1], stride = [1, 32]>>, index, index -> vector<1x2xf32>
+// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>:
+// CHECK-SAME: vector<1x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 1], stride = [1, 32]>>, index, index
+gpu.module @xevm_module{
+ gpu.func @load_store_matrix_3(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>) {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %1 = xegpu.load_matrix %arg0[%c0, %c1] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} :
+ !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>, index, index -> vector<16x2xf32>
+ xegpu.store_matrix %1, %arg0[%c0, %c1] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} :
+ vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>, index, index
+ gpu.return
+ }
+}
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index 60bd24a..1e4cf8d 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -1308,7 +1308,7 @@ llvm.func @experimental_constrained_fpext(%s: f32, %v: vector<4xf32>) {
// CHECK-DAG: declare float @llvm.cos.f32(float)
// CHECK-DAG: declare <8 x float> @llvm.cos.v8f32(<8 x float>) #0
// CHECK-DAG: declare { float, float } @llvm.sincos.f32(float)
-// CHECK-DAG: declare { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float>) #0
+// CHECK-DAG: declare { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float>)
// CHECK-DAG: declare float @llvm.copysign.f32(float, float)
// CHECK-DAG: declare float @llvm.rint.f32(float)
// CHECK-DAG: declare double @llvm.rint.f64(double)
diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir
new file mode 100644
index 0000000..b5bb223
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir
@@ -0,0 +1,182 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// -----
+
+// Test valid architectures work
+
+// Valid case on sm_100a
+gpu.module @valid_f16x2_rs_sm_100a [#nvvm.target<chip = "sm_100a">] {
+ func.func @convert_rs() {
+ %f1 = llvm.mlir.constant(1.0 : f32) : f32
+ %f2 = llvm.mlir.constant(2.0 : f32) : f32
+ %rbits = llvm.mlir.constant(0x12345678 : i32) : i32
+ %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits : vector<2xf16>
+ return
+ }
+}
+
+// Valid case on sm_103a
+gpu.module @valid_bf16x2_rs_sm_103a [#nvvm.target<chip = "sm_103a">] {
+ func.func @convert_rs() {
+ %f1 = llvm.mlir.constant(1.0 : f32) : f32
+ %f2 = llvm.mlir.constant(2.0 : f32) : f32
+ %rbits = llvm.mlir.constant(0 : i32) : i32
+ %res = nvvm.convert.f32x2.to.bf16x2 %f1, %f2, %rbits : vector<2xbf16>
+ return
+ }
+}
+
+// -----
+
+// Test F32x2 -> F16x2 with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x2_to_f16x2_rs
+llvm.func @convert_f32x2_to_f16x2_rs(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> {
+ // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits : vector<2xf16>
+ llvm.return %res : vector<2xf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_satfinite
+llvm.func @convert_f32x2_to_f16x2_rs_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> {
+ // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {sat = #nvvm.sat_mode<satfinite>} : vector<2xf16>
+ llvm.return %res : vector<2xf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_relu
+llvm.func @convert_f32x2_to_f16x2_rs_relu(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> {
+ // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true} : vector<2xf16>
+ llvm.return %res : vector<2xf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_relu_satfinite
+llvm.func @convert_f32x2_to_f16x2_rs_relu_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> {
+ // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true, sat = #nvvm.sat_mode<satfinite>} : vector<2xf16>
+ llvm.return %res : vector<2xf16>
+}
+
+// -----
+
+// Test F32x2 -> BF16x2 with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs
+llvm.func @convert_f32x2_to_bf16x2_rs(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> {
+ // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits : vector<2xbf16>
+ llvm.return %res : vector<2xbf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_satfinite
+llvm.func @convert_f32x2_to_bf16x2_rs_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> {
+ // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16>
+ llvm.return %res : vector<2xbf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_relu
+llvm.func @convert_f32x2_to_bf16x2_rs_relu(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> {
+ // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true} : vector<2xbf16>
+ llvm.return %res : vector<2xbf16>
+}
+
+// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_relu_satfinite
+llvm.func @convert_f32x2_to_bf16x2_rs_relu_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> {
+ // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16>
+ llvm.return %res : vector<2xbf16>
+}
+
+// -----
+
+// Test F32x4 -> F8x4 (E4M3) with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x4_to_f8x4_e4m3_rs
+llvm.func @convert_f32x4_to_f8x4_e4m3_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E4M3FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// CHECK-LABEL: @convert_f32x4_to_f8x4_e4m3_rs_relu
+llvm.func @convert_f32x4_to_f8x4_e4m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E4M3FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test F32x4 -> F8x4 (E5M2) with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x4_to_f8x4_e5m2_rs
+llvm.func @convert_f32x4_to_f8x4_e5m2_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E5M2)
+ llvm.return %res : vector<4xi8>
+}
+
+// CHECK-LABEL: @convert_f32x4_to_f8x4_e5m2_rs_relu
+llvm.func @convert_f32x4_to_f8x4_e5m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E5M2)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test F32x4 -> F6x4 (E2M3) with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x4_to_f6x4_e2m3_rs
+llvm.func @convert_f32x4_to_f6x4_e2m3_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f6E2M3FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// CHECK-LABEL: @convert_f32x4_to_f6x4_e2m3_rs_relu
+llvm.func @convert_f32x4_to_f6x4_e2m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E2M3FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test F32x4 -> F6x4 (E3M2) with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x4_to_f6x4_e3m2_rs
+llvm.func @convert_f32x4_to_f6x4_e3m2_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f6E3M2FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// CHECK-LABEL: @convert_f32x4_to_f6x4_e3m2_rs_relu
+llvm.func @convert_f32x4_to_f6x4_e3m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> {
+ // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E3M2FN)
+ llvm.return %res : vector<4xi8>
+}
+
+// -----
+
+// Test F32x4 -> F4x4 (E2M1) with stochastic rounding (.rs)
+
+// CHECK-LABEL: @convert_f32x4_to_f4x4_e2m1_rs
+llvm.func @convert_f32x4_to_f4x4_e2m1_rs(%src : vector<4xf32>, %rbits : i32) -> i16 {
+ // CHECK: %{{.*}} = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits : vector<4xf32> -> i16 (f4E2M1FN)
+ llvm.return %res : i16
+}
+
+// CHECK-LABEL: @convert_f32x4_to_f4x4_e2m1_rs_relu
+llvm.func @convert_f32x4_to_f4x4_e2m1_rs_relu(%src : vector<4xf32>, %rbits : i32) -> i16 {
+ // CHECK: %{{.*}} = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}})
+ %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits {relu = true} : vector<4xf32> -> i16 (f4E2M1FN)
+ llvm.return %res : i16
+}
+
diff --git a/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir
new file mode 100644
index 0000000..a8a7430
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir
@@ -0,0 +1,49 @@
+// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+
+// -----
+
+llvm.func @redux_sync_i32_with_abs(%value: i32, %offset: i32) {
+ // expected-error@+1 {{abs attribute is supported only for f32 type}}
+ %res = nvvm.redux.sync add %value, %offset {abs = true}: i32 -> i32
+ llvm.return
+}
+
+// -----
+
+llvm.func @redux_sync_i32_with_nan(%value: i32, %offset: i32) {
+ // expected-error@+1 {{nan attribute is supported only for f32 type}}
+ %res = nvvm.redux.sync add %value, %offset {nan = true}: i32 -> i32
+ llvm.return
+}
+
+// -----
+
+llvm.func @redux_sync_f32_with_invalid_kind_add(%value: f32, %offset: i32) {
+ // expected-error@+1 {{'add' redux kind unsupported with 'f32' type. Only supported type is 'i32'.}}
+ %res = nvvm.redux.sync add %value, %offset: f32 -> f32
+ llvm.return
+}
+
+// -----
+
+llvm.func @redux_sync_f32_with_invalid_kind_and(%value: f32, %offset: i32) {
+ // expected-error@+1 {{'and' redux kind unsupported with 'f32' type. Only supported type is 'i32'.}}
+ %res = nvvm.redux.sync and %value, %offset: f32 -> f32
+ llvm.return
+}
+
+// -----
+
+llvm.func @redux_sync_i32_with_invalid_kind_fmin(%value: i32, %offset: i32) {
+ // expected-error@+1 {{'fmin' redux kind unsupported with 'i32' type. Only supported type is 'f32'.}}
+ %res = nvvm.redux.sync fmin %value, %offset: i32 -> i32
+ llvm.return
+}
+
+// -----
+
+llvm.func @redux_sync_non_matching_types(%value: i32, %offset: i32) {
+ // expected-error@+1 {{failed to verify that all of {res, val} have same type}}
+ %res = nvvm.redux.sync add %value, %offset: i32 -> f32
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir
new file mode 100644
index 0000000..f2ccfe7
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+
+// -----
+
+func.func @nvvm_invalid_shfl_pred(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) {
+ // expected-error@+1 {{"return_value_and_is_valid" attribute must be specified when the return type is a struct type}}
+ %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 : f32 -> !llvm.struct<(f32, i1)>
+}
+
+// -----
+
+func.func @nvvm_invalid_shfl_invalid_return_type_1(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) {
+ // expected-error@+1 {{expected return type to be of type 'f32' but got 'i32' instead}}
+ %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 : f32 -> i32
+}
+
+// -----
+
+func.func @nvvm_invalid_shfl_invalid_return_type_2(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) {
+ // expected-error@+1 {{expected first element in the returned struct to be of type 'f32' but got 'i32' instead}}
+ %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 {return_value_and_is_valid} : f32 -> !llvm.struct<(i32, i1)>
+}
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir
new file mode 100644
index 0000000..1b93f20c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s
+
+// -----
+
+llvm.func @nvvm_tcgen05_ld_32x32b_offset(%tmemAddr : !llvm.ptr<6>, %offset : i64) -> () {
+ // expected-error@+1 {{offset argument is only supported for shape 16x32bx2}}
+ %ldv2 = nvvm.tcgen05.ld %tmemAddr, %offset { pack, shape = #nvvm.tcgen05_ldst_shape<shape_32x32b>} : vector<2 x i32>
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 8a848221..3fbd9e0 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1040,6 +1040,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
llvm.return
}
+// CHECK-LABEL: rocdl.global.load.async.to.lds
+llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {
+ // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8
+ rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
+ // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32
+ rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
+ // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64
+ rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
+ // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128
+ rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3>
+ llvm.return
+}
+
// CHECK-LABEL: rocdl.tensor.load.to.lds
llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,
%dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) {
diff --git a/mlir/test/Target/SPIRV/group-ops.mlir b/mlir/test/Target/SPIRV/group-ops.mlir
index cf519cb..6f19b35 100644
--- a/mlir/test/Target/SPIRV/group-ops.mlir
+++ b/mlir/test/Target/SPIRV/group-ops.mlir
@@ -1,11 +1,13 @@
-// RUN: mlir-translate -no-implicit-module -test-spirv-roundtrip -split-input-file %s | FileCheck %s
+// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip --split-input-file %s | FileCheck %s
// RUN: %if spirv-tools %{ rm -rf %t %}
// RUN: %if spirv-tools %{ mkdir %t %}
// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}
// RUN: %if spirv-tools %{ spirv-val %t %}
-spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, Linkage, SubgroupBallotKHR, Groups, SubgroupBufferBlockIOINTEL, GroupNonUniformArithmetic, GroupUniformArithmeticKHR], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_shader_ballot, SPV_INTEL_subgroups, SPV_KHR_uniform_group_instructions]> {
+spirv.module Logical GLSL450 requires #spirv.vce<v1.3,
+ [Shader, Linkage, SubgroupBallotKHR, Groups, GroupNonUniformArithmetic, GroupUniformArithmeticKHR],
+ [SPV_KHR_storage_buffer_storage_class, SPV_KHR_shader_ballot, SPV_KHR_uniform_group_instructions]> {
// CHECK-LABEL: @subgroup_ballot
spirv.func @subgroup_ballot(%predicate: i1) -> vector<4xi32> "None" {
// CHECK: %{{.*}} = spirv.KHR.SubgroupBallot %{{.*}}: vector<4xi32>
@@ -24,30 +26,6 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, Linkage, Subgrou
%0 = spirv.GroupBroadcast <Workgroup> %value, %localid : f32, vector<3xi32>
spirv.ReturnValue %0: f32
}
- // CHECK-LABEL: @subgroup_block_read_intel
- spirv.func @subgroup_block_read_intel(%ptr : !spirv.ptr<i32, StorageBuffer>) -> i32 "None" {
- // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> i32
- %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> i32
- spirv.ReturnValue %0: i32
- }
- // CHECK-LABEL: @subgroup_block_read_intel_vector
- spirv.func @subgroup_block_read_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>) -> vector<3xi32> "None" {
- // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32>
- %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32>
- spirv.ReturnValue %0: vector<3xi32>
- }
- // CHECK-LABEL: @subgroup_block_write_intel
- spirv.func @subgroup_block_write_intel(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: i32) -> () "None" {
- // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : i32
- spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : i32
- spirv.Return
- }
- // CHECK-LABEL: @subgroup_block_write_intel_vector
- spirv.func @subgroup_block_write_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: vector<3xi32>) -> () "None" {
- // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : vector<3xi32>
- spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : vector<3xi32>
- spirv.Return
- }
// CHECK-LABEL: @group_iadd
spirv.func @group_iadd(%value: i32) -> i32 "None" {
// CHECK: spirv.GroupIAdd <Workgroup> <Reduce> %{{.*}} : i32
diff --git a/mlir/test/Target/SPIRV/subgroup-block-intel.mlir b/mlir/test/Target/SPIRV/subgroup-block-intel.mlir
new file mode 100644
index 0000000..14060e6
--- /dev/null
+++ b/mlir/test/Target/SPIRV/subgroup-block-intel.mlir
@@ -0,0 +1,34 @@
+// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip %s | FileCheck %s
+
+// RUN: %if spirv-tools %{ rm -rf %t %}
+// RUN: %if spirv-tools %{ mkdir %t %}
+// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --spirv-save-validation-files-with-prefix=%t/module %s %}
+// RUN: %if spirv-tools %{ spirv-val %t %}
+
+spirv.module Physical64 GLSL450 requires #spirv.vce<v1.3, [Addresses, Shader, Linkage, SubgroupBufferBlockIOINTEL],
+ [SPV_KHR_storage_buffer_storage_class, SPV_INTEL_subgroups]> {
+ // CHECK-LABEL: @subgroup_block_read_intel
+ spirv.func @subgroup_block_read_intel(%ptr : !spirv.ptr<i32, StorageBuffer>) -> i32 "None" {
+ // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> i32
+ %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> i32
+ spirv.ReturnValue %0: i32
+ }
+ // CHECK-LABEL: @subgroup_block_read_intel_vector
+ spirv.func @subgroup_block_read_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>) -> vector<3xi32> "None" {
+ // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32>
+ %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32>
+ spirv.ReturnValue %0: vector<3xi32>
+ }
+ // CHECK-LABEL: @subgroup_block_write_intel
+ spirv.func @subgroup_block_write_intel(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: i32) -> () "None" {
+ // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : i32
+ spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : i32
+ spirv.Return
+ }
+ // CHECK-LABEL: @subgroup_block_write_intel_vector
+ spirv.func @subgroup_block_write_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: vector<3xi32>) -> () "None" {
+ // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : vector<3xi32>
+ spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : vector<3xi32>
+ spirv.Return
+ }
+}
diff --git a/mlir/test/Transforms/test-legalizer-no-materializations.mlir b/mlir/test/Transforms/test-legalizer-no-materializations.mlir
new file mode 100644
index 0000000..82dd742
--- /dev/null
+++ b/mlir/test/Transforms/test-legalizer-no-materializations.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0 build-materializations=0 attach-debug-materialization-kind=1" -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-KIND
+
+// CHECK-LABEL: func @dropped_input_in_use
+// CHECK-KIND-LABEL: func @dropped_input_in_use
+func.func @dropped_input_in_use(%arg: i16, %arg2: i64) {
+ // CHECK-NEXT: %[[cast:.*]] = "test.cast"() : () -> i16
+ // CHECK-NEXT: "work"(%[[cast]]) : (i16)
+ // CHECK-KIND-NEXT: %[[cast:.*]] = builtin.unrealized_conversion_cast to i16 {__kind__ = "source"}
+ // CHECK-KIND-NEXT: "work"(%[[cast]]) : (i16)
+ // expected-remark@+1 {{op 'work' is not legalizable}}
+ "work"(%arg) : (i16) -> ()
+}
+
+// -----
+
+// CHECK-KIND-LABEL: func @test_lookup_without_converter
+// CHECK-KIND: %[[producer:.*]] = "test.valid_producer"() : () -> i16
+// CHECK-KIND: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[producer]] : i16 to f64 {__kind__ = "target"}
+// CHECK-KIND: "test.valid_consumer"(%[[cast]]) : (f64) -> ()
+// CHECK-KIND: "test.valid_consumer"(%[[producer]]) : (i16) -> ()
+func.func @test_lookup_without_converter() {
+ %0 = "test.replace_with_valid_producer"() {type = i16} : () -> (i64)
+ "test.replace_with_valid_consumer"(%0) {with_converter} : (i64) -> ()
+ // Make sure that the second "replace_with_valid_consumer" lowering does not
+ // lookup the materialization that was created for the above op.
+ "test.replace_with_valid_consumer"(%0) : (i64) -> ()
+ // expected-remark@+1 {{op 'func.return' is not legalizable}}
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @remap_moved_region_args
+func.func @remap_moved_region_args() {
+ // CHECK-NEXT: return
+ // CHECK-NEXT: ^bb1(%[[arg0:.*]]: i64, %[[arg1:.*]]: i16, %[[arg2:.*]]: i64, %[[arg3:.*]]: f32):
+ // CHECK-NEXT: %[[cast1:.*]]:2 = builtin.unrealized_conversion_cast %[[arg3]] : f32 to f16, f16
+ // CHECK-NEXT: %[[cast2:.*]] = builtin.unrealized_conversion_cast %[[arg2]] : i64 to f64
+ // CHECK-NEXT: %[[cast3:.*]] = builtin.unrealized_conversion_cast %[[arg0]] : i64 to f64
+ // CHECK-NEXT: %[[cast4:.*]] = "test.cast"(%[[cast1]]#0, %[[cast1]]#1) : (f16, f16) -> f32
+ // CHECK-NEXT: "test.valid"(%[[cast3]], %[[cast2]], %[[cast4]]) : (f64, f64, f32)
+ "test.region"() ({
+ ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32):
+ "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> ()
+ }) : () -> ()
+ // expected-remark@+1 {{op 'func.return' is not legalizable}}
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @remap_cloned_region_args
+func.func @remap_cloned_region_args() {
+ // CHECK-NEXT: return
+ // CHECK-NEXT: ^bb1(%[[arg0:.*]]: i64, %[[arg1:.*]]: i16, %[[arg2:.*]]: i64, %[[arg3:.*]]: f32):
+ // CHECK-NEXT: %[[cast1:.*]]:2 = builtin.unrealized_conversion_cast %[[arg3]] : f32 to f16, f16
+ // CHECK-NEXT: %[[cast2:.*]] = builtin.unrealized_conversion_cast %[[arg2]] : i64 to f64
+ // CHECK-NEXT: %[[cast3:.*]] = builtin.unrealized_conversion_cast %[[arg0]] : i64 to f64
+ // CHECK-NEXT: %[[cast4:.*]] = "test.cast"(%[[cast1]]#0, %[[cast1]]#1) : (f16, f16) -> f32
+ // CHECK-NEXT: "test.valid"(%[[cast3]], %[[cast2]], %[[cast4]]) : (f64, f64, f32)
+ "test.region"() ({
+ ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32):
+ "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> ()
+ }) {legalizer.should_clone} : () -> ()
+ // expected-remark@+1 {{op 'func.return' is not legalizable}}
+ return
+}
diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir
index 94c5bb4..7c43bb7 100644
--- a/mlir/test/Transforms/test-legalizer.mlir
+++ b/mlir/test/Transforms/test-legalizer.mlir
@@ -1,7 +1,6 @@
// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=1" -verify-diagnostics %s | FileCheck %s
// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=1" -verify-diagnostics -profile-actions-to=- %s | FileCheck %s --check-prefix=CHECK-PROFILER
// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0" -verify-diagnostics %s | FileCheck %s
-// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0 build-materializations=0 attach-debug-materialization-kind=1" -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-KIND
// CHECK-PROFILER: "name": "pass-execution", "cat": "PERF", "ph": "B"
// CHECK-PROFILER: "name": "apply-conversion", "cat": "PERF", "ph": "B"
@@ -146,36 +145,6 @@ func.func @no_remap_nested() {
// -----
-// CHECK-LABEL: func @remap_moved_region_args
-func.func @remap_moved_region_args() {
- // CHECK-NEXT: return
- // CHECK-NEXT: ^bb1(%{{.*}}: f64, %{{.*}}: f64, %{{.*}}: f16, %{{.*}}: f16):
- // CHECK-NEXT: "test.cast"{{.*}} : (f16, f16) -> f32
- // CHECK-NEXT: "test.valid"{{.*}} : (f64, f64, f32)
- "test.region"() ({
- ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32):
- "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> ()
- }) : () -> ()
- // expected-remark@+1 {{op 'func.return' is not legalizable}}
- return
-}
-
-// -----
-
-// CHECK-LABEL: func @remap_cloned_region_args
-func.func @remap_cloned_region_args() {
- // CHECK-NEXT: return
- // CHECK-NEXT: ^bb1(%{{.*}}: f64, %{{.*}}: f64, %{{.*}}: f16, %{{.*}}: f16):
- // CHECK-NEXT: "test.cast"{{.*}} : (f16, f16) -> f32
- // CHECK-NEXT: "test.valid"{{.*}} : (f64, f64, f32)
- "test.region"() ({
- ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32):
- "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> ()
- }) {legalizer.should_clone} : () -> ()
- // expected-remark@+1 {{op 'func.return' is not legalizable}}
- return
-}
-
// CHECK-LABEL: func @remap_drop_region
func.func @remap_drop_region() {
// CHECK-NEXT: return
@@ -191,12 +160,9 @@ func.func @remap_drop_region() {
// -----
// CHECK-LABEL: func @dropped_input_in_use
-// CHECK-KIND-LABEL: func @dropped_input_in_use
func.func @dropped_input_in_use(%arg: i16, %arg2: i64) {
// CHECK-NEXT: %[[cast:.*]] = "test.cast"() : () -> i16
// CHECK-NEXT: "work"(%[[cast]]) : (i16)
- // CHECK-KIND-NEXT: %[[cast:.*]] = builtin.unrealized_conversion_cast to i16 {__kind__ = "source"}
- // CHECK-KIND-NEXT: "work"(%[[cast]]) : (i16)
// expected-remark@+1 {{op 'work' is not legalizable}}
"work"(%arg) : (i16) -> ()
}
@@ -452,11 +418,6 @@ func.func @test_multiple_1_to_n_replacement() {
// CHECK: %[[cast:.*]] = "test.cast"(%[[producer]]) : (i16) -> f64
// CHECK: "test.valid_consumer"(%[[cast]]) : (f64) -> ()
// CHECK: "test.valid_consumer"(%[[producer]]) : (i16) -> ()
-// CHECK-KIND-LABEL: func @test_lookup_without_converter
-// CHECK-KIND: %[[producer:.*]] = "test.valid_producer"() : () -> i16
-// CHECK-KIND: %[[cast:.*]] = builtin.unrealized_conversion_cast %[[producer]] : i16 to f64 {__kind__ = "target"}
-// CHECK-KIND: "test.valid_consumer"(%[[cast]]) : (f64) -> ()
-// CHECK-KIND: "test.valid_consumer"(%[[producer]]) : (i16) -> ()
func.func @test_lookup_without_converter() {
%0 = "test.replace_with_valid_producer"() {type = i16} : () -> (i64)
"test.replace_with_valid_consumer"(%0) {with_converter} : (i64) -> ()
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
index fd2b943..12edecc 100644
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -1553,8 +1553,7 @@ struct TestLegalizePatternDriver
[](Type type) { return type.isF32(); });
});
target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) {
- return converter.isSignatureLegal(op.getFunctionType()) &&
- converter.isLegal(&op.getBody());
+ return converter.isSignatureLegal(op.getFunctionType());
});
target.addDynamicallyLegalOp<func::CallOp>(
[&](func::CallOp op) { return converter.isLegal(op); });
@@ -2156,8 +2155,7 @@ struct TestTypeConversionDriver
recursiveType.getName() == "outer_converted_type");
});
target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) {
- return converter.isSignatureLegal(op.getFunctionType()) &&
- converter.isLegal(&op.getBody());
+ return converter.isSignatureLegal(op.getFunctionType());
});
target.addDynamicallyLegalOp<TestCastOp>([&](TestCastOp op) {
// Allow casts from F64 to F32.
diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
index 76d4611..93d5144 100644
--- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
+++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp
@@ -200,7 +200,8 @@ class TestStepOpPattern : public OpConversionPattern<vector::StepOp> {
Value sgId =
gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr);
- auto maybeOffsets = sliceAttr.getOffsets(rewriter, loc, sgId, wgShape);
+ auto maybeOffsets =
+ sliceAttr.computeDistributedCoords(rewriter, loc, sgId, wgShape);
if (failed(maybeOffsets))
return failure();
diff --git a/mlir/test/python/CMakeLists.txt b/mlir/test/python/CMakeLists.txt
index 2c12381..c81f75f 100644
--- a/mlir/test/python/CMakeLists.txt
+++ b/mlir/test/python/CMakeLists.txt
@@ -11,7 +11,7 @@ add_public_tablegen_target(MLIRPythonTestIncGen)
add_subdirectory(lib)
-set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules mlir-runner)
+set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules mlir-runner mlir_c_runner_utils mlir_runner_utils)
if(NOT MLIR_STANDALONE_BUILD)
list(APPEND MLIR_PYTHON_TEST_DEPENDS FileCheck count not)
endif()