diff options
Diffstat (limited to 'mlir/test')
24 files changed, 774 insertions, 91 deletions
diff --git a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir index a75f30d..cd8cfc8 100644 --- a/mlir/test/Conversion/ConvertToSPIRV/vector.mlir +++ b/mlir/test/Conversion/ConvertToSPIRV/vector.mlir @@ -275,6 +275,42 @@ func.func @reduction_minimumf(%v : vector<3xf32>, %s: f32) -> f32 {  // ----- +// CHECK-LABEL:   spirv.func @reduction_minnumf( +// CHECK-SAME:      %[[V:.*]]: vector<3xf32>, +// CHECK-SAME:      %[[S:.*]]: f32) -> f32 "None" { +// CHECK:           %[[S0:.*]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xf32> +// CHECK:           %[[S1:.*]] = spirv.CompositeExtract %[[V]][1 : i32] : vector<3xf32> +// CHECK:           %[[S2:.*]] = spirv.CompositeExtract %[[V]][2 : i32] : vector<3xf32> +// CHECK:           %[[MIN0:.*]] = spirv.GL.FMin %[[S0]], %[[S1]] : f32 +// CHECK:           %[[MIN1:.*]] = spirv.GL.FMin %[[MIN0]], %[[S2]] : f32 +// CHECK:           %[[MIN2:.*]] = spirv.GL.FMin %[[MIN1]], %[[S]] : f32 +// CHECK:           spirv.ReturnValue %[[MIN2]] : f32 +// CHECK:         } +func.func @reduction_minnumf(%v : vector<3xf32>, %s: f32) -> f32 { +  %reduce = vector.reduction <minnumf>, %v, %s : vector<3xf32> into f32 +  return %reduce : f32 +} + +// ----- + +// CHECK-LABEL:   spirv.func @reduction_maxnumf( +// CHECK-SAME:      %[[V:.*]]: vector<3xf32>, +// CHECK-SAME:      %[[S:.*]]: f32) -> f32 "None" { +// CHECK:           %[[S0:.*]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xf32> +// CHECK:           %[[S1:.*]] = spirv.CompositeExtract %[[V]][1 : i32] : vector<3xf32> +// CHECK:           %[[S2:.*]] = spirv.CompositeExtract %[[V]][2 : i32] : vector<3xf32> +// CHECK:           %[[MAX0:.*]] = spirv.GL.FMax %[[S0]], %[[S1]] : f32 +// CHECK:           %[[MAX1:.*]] = spirv.GL.FMax %[[MAX0]], %[[S2]] : f32 +// CHECK:           %[[MAX2:.*]] = spirv.GL.FMax %[[MAX1]], %[[S]] : f32 +// CHECK:           spirv.ReturnValue %[[MAX2]] : f32 +// CHECK:         } +func.func @reduction_maxnumf(%v : vector<3xf32>, %s: f32) -> f32 { +  %reduce = vector.reduction <maxnumf>, %v, %s : vector<3xf32> into f32 +  return %reduce : f32 +} + +// ----- +  // CHECK-LABEL: func @reduction_maxsi  //  CHECK-SAME: (%[[V:.+]]: vector<3xi32>, %[[S:.+]]: i32)  //       CHECK:   %[[S0:.+]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<3xi32> diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir index b616632..b062736 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir @@ -243,6 +243,106 @@ func.func @vecdim_reduction_ori(%in: memref<256x512xi32>, %out: memref<256xi32>)  // CHECK:         affine.store %[[final_red]], %{{.*}} : memref<256xi32>  // CHECK:       } +// ----- + +func.func @vecdim_reduction_xori(%in: memref<256x512xi32>, %out: memref<256xi32>) { + %cst = arith.constant 0 : i32 + affine.for %i = 0 to 256 { +   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) { +     %ld = affine.load %in[%i, %j] : memref<256x512xi32> +     %xor = arith.xori %red_iter, %ld : i32 +     affine.yield %xor : i32 +   } +   affine.store %final_red, %out[%i] : memref<256xi32> + } + return +} + +// CHECK-LABEL:   func.func @vecdim_reduction_xori( +// CHECK-SAME:      %[[input:.*]]: memref<256x512xi32>, +// CHECK-SAME:      %[[output:.*]]: memref<256xi32>) { +// CHECK:           %[[cst:.*]] = arith.constant 0 : i32 +// CHECK:           affine.for %{{.*}} = 0 to 256 { +// CHECK:             %[[vzero:.*]] = arith.constant dense<0> : vector<128xi32> +// CHECK:             %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xi32>) { +// CHECK:               %[[poison:.*]] = ub.poison : i32 +// CHECK:               %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xi32>, vector<128xi32> +// CHECK:               %[[xor:.*]] = arith.xori %[[red_iter]], %[[ld]] : vector<128xi32> +// CHECK:               affine.yield %[[xor]] : vector<128xi32> +// CHECK:             } +// CHECK:             %[[final_red:.*]] = vector.reduction <xor>, %[[vred]] : vector<128xi32> into i32 +// CHECK:             affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xi32> +// CHECK:           } +// CHECK:           return +// CHECK:         } + +// ----- + +func.func @vecdim_reduction_minnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) { + %cst = arith.constant 0xFF800000 : f32 + affine.for %i = 0 to 256 { +   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) { +     %ld = affine.load %in[%i, %j] : memref<256x512xf32> +     %min = arith.minnumf %red_iter, %ld : f32 +     affine.yield %min : f32 +   } +   affine.store %final_red, %out[%i] : memref<256xf32> + } + return +} + +// CHECK-LABEL:   func.func @vecdim_reduction_minnumf( +// CHECK-SAME:      %[[input:.*]]: memref<256x512xf32>, +// CHECK-SAME:      %[[output:.*]]: memref<256xf32>) { +// CHECK:           %[[cst:.*]] = arith.constant 0xFF800000 : f32 +// CHECK:           affine.for %{{.*}} = 0 to 256 { +// CHECK:             %[[vzero:.*]] = arith.constant dense<0x7FC00000> : vector<128xf32> +// CHECK:             %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) { +// CHECK:               %[[poison:.*]] = ub.poison : f32 +// CHECK:               %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32> +// CHECK:               %[[min:.*]] = arith.minnumf %[[red_iter]], %[[ld]] : vector<128xf32> +// CHECK:               affine.yield %[[min]] : vector<128xf32> +// CHECK:             } +// CHECK:             %[[red_scalar:.*]] = vector.reduction <minnumf>, %[[vred]] : vector<128xf32> into f32 +// CHECK:             %[[final_red:.*]] = arith.minnumf %[[red_scalar]], %[[cst]] : f32 +// CHECK:             affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32> +// CHECK:           } +// CHECK:           return +// CHECK:         } + +// ----- + +func.func @vecdim_reduction_maxnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) { + %cst = arith.constant 0xFF800000 : f32 + affine.for %i = 0 to 256 { +   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) { +     %ld = affine.load %in[%i, %j] : memref<256x512xf32> +     %max = arith.maxnumf %red_iter, %ld : f32 +     affine.yield %max : f32 +   } +   affine.store %final_red, %out[%i] : memref<256xf32> + } + return +} + +// CHECK-LABEL:   func.func @vecdim_reduction_maxnumf( +// CHECK-SAME:      %[[input:.*]]: memref<256x512xf32>, +// CHECK-SAME:      %[[output:.*]]: memref<256xf32>) { +// CHECK:           %[[cst:.*]] = arith.constant 0xFF800000 : f32 +// CHECK:           affine.for %{{.*}} = 0 to 256 { +// CHECK:             %[[vzero:.*]] = arith.constant dense<0xFFC00000> : vector<128xf32> +// CHECK:             %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vzero]]) -> (vector<128xf32>) { +// CHECK:               %[[poison:.*]] = ub.poison : f32 +// CHECK:               %[[ld:.*]] = vector.transfer_read %[[input]]{{\[}}%{{.*}}, %{{.*}}], %[[poison]] : memref<256x512xf32>, vector<128xf32> +// CHECK:               %[[max:.*]] = arith.maxnumf %[[red_iter]], %[[ld]] : vector<128xf32> +// CHECK:               affine.yield %[[max]] : vector<128xf32> +// CHECK:             } +// CHECK:             %[[red_scalar:.*]] = vector.reduction <maxnumf>, %[[vred]] : vector<128xf32> into f32 +// CHECK:             %[[final_red:.*]] = arith.maxnumf %[[red_scalar]], %[[cst]] : f32 +// CHECK:             affine.store %[[final_red]], %[[output]]{{\[}}%{{.*}}] : memref<256xf32> +// CHECK:           } +// CHECK:           return +// CHECK:         }  // ----- diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir index 5f594fb..f285196 100644 --- a/mlir/test/Dialect/EmitC/invalid_ops.mlir +++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir @@ -876,3 +876,41 @@ func.func @test_do(%arg0 : !emitc.ptr<i32>) {    return  } + +// ----- + +func.func @test_for_none_block_argument(%arg0: index) { +  // expected-error@+1 {{expected body to have a single block argument for the induction variable}} +  "emitc.for"(%arg0, %arg0, %arg0) ( +    { +      emitc.yield +    } +  ) : (index, index, index) -> () +  return +} + +// ----- + +func.func @test_for_more_than_one_block_argument(%arg0: index) { +  // expected-error@+1 {{expected body to have a single block argument for the induction variable}} +  "emitc.for"(%arg0, %arg0, %arg0) ( +    { +    ^bb0(%i0 : index, %i1 : index): +      emitc.yield +    } +  ) : (index, index, index) -> () +  return +} + +// ----- + +func.func @test_for_unmatch_type(%arg0: index) { +  // expected-error@+1 {{expected induction variable to be same type as bounds}} +  "emitc.for"(%arg0, %arg0, %arg0) ( +    { +    ^bb0(%i0 : f32): +      emitc.yield +    } +  ) : (index, index, index) -> () +  return +} diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index aaf9f80..49b6342 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -664,21 +664,21 @@ func.func @zero_non_llvm_type() {  // -----  func.func @nvvm_invalid_shfl_pred_1(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { -  // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} +  // expected-error@+1 {{expected return type to be a two-element struct}}    %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> i32  }  // -----  func.func @nvvm_invalid_shfl_pred_2(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { -  // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} +  // expected-error@+1 {{expected return type to be a two-element struct}}    %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32)>  }  // -----  func.func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { -  // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} +  // expected-error@+1 {{expected second element in the returned struct to be of type 'i1' but got 'i32' instead}}    %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i32)>  } diff --git a/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir new file mode 100644 index 0000000..35f5e1b --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/nvvm/invalid-convert-stochastic-rounding.mlir @@ -0,0 +1,90 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// Test invalid target architecture (sm_100 instead of sm_100a) +gpu.module @invalid_arch_sm_100 [#nvvm.target<chip = "sm_100">] { +  func.func @convert_rs() { +    %f1 = llvm.mlir.constant(1.0 : f32) : f32 +    %f2 = llvm.mlir.constant(2.0 : f32) : f32 +    %rbits = llvm.mlir.constant(0x12345678 : i32) : i32 +    // expected-error@+1 {{'nvvm.convert.f32x2.to.f16x2' op is not supported on sm_100}} +    %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits : vector<2xf16> +    return +  } +} + +// ----- + +// Test that operations require stochastic rounding mode +llvm.func @invalid_rnd_mode_f16x2(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> { +  // expected-error@+1 {{Only RS rounding mode is supported for conversions from f32x2 to f16x2.}} +  %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode<rn>} : vector<2xf16> +  llvm.return %res : vector<2xf16> +} + +// ----- + +llvm.func @invalid_rnd_mode_bf16x2(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> { +  // expected-error@+1 {{Only RS rounding mode is supported for conversions from f32x2 to bf16x2.}} +  %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {rnd = #nvvm.fp_rnd_mode<rz>} : vector<2xbf16> +  llvm.return %res : vector<2xbf16> +} + +// ----- + +// Test invalid destination types for f8x4 (should only accept f8E4M3FN, f8E5M2) +llvm.func @invalid_dst_type_f8x4_e3m4(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // expected-error@+1 {{Only 'f8E4M3FN' and 'f8E5M2' types are supported for conversions from f32x4 to f8x4.}} +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E3M4) +  llvm.return %res : vector<4xi8> +} + +// ----- + +llvm.func @invalid_dst_type_f8x4_e8m0(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // expected-error@+1 {{Only 'f8E4M3FN' and 'f8E5M2' types are supported for conversions from f32x4 to f8x4.}} +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E8M0FNU) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test invalid destination types for f6x4 (should only accept f6E2M3FN, f6E3M2FN) +llvm.func @invalid_dst_type_f6x4_f8(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // expected-error@+1 {{Only 'f6E2M3FN' and 'f6E3M2FN' types are supported for conversions from f32x4 to f6x4.}} +  %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E4M3FN) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test invalid destination type for f4x4 (should only accept f4E2M1FN) +llvm.func @invalid_dst_type_f4x4_f6(%src : vector<4xf32>, %rbits : i32) -> i16 { +  // expected-error@+1 {{Only 'f4E2M1FN' type is supported for conversions from f32x4 to f4x4.}} +  %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits : vector<4xf32> -> i16 (f6E2M3FN) +  llvm.return %res : i16 +} + +// ----- + +// Test invalid rounding modes for non-stochastic ops +llvm.func @convert_float_to_tf32_rs_not_supported(%src : f32) -> i32 { +  // expected-error @below {{Only {rn,rz,rna} rounding modes supported for ConvertFloatToTF32Op.}} +  %res = nvvm.convert.float.to.tf32 %src {rnd = #nvvm.fp_rnd_mode<rs>} +  llvm.return %res : i32 +} + +// ----- + +llvm.func @convert_f32x2_to_f8x2_rs_not_supported(%a : f32, %b : f32) { +  // expected-error @below {{Only RN rounding mode is supported for conversions from f32x2 to 'f8E4M3FN' and 'f8E5M2' types}} +  %res = nvvm.convert.f32x2.to.f8x2 %a, %b {rnd = #nvvm.fp_rnd_mode<rs>, sat = #nvvm.sat_mode<satfinite>} : i16 (f8E4M3FN) +  llvm.return +} + +// ----- + +llvm.func @convert_bf16x2_to_f8x2_rs_not_supported(%src : vector<2xbf16>) { +  // expected-error @below {{Only RZ and RP rounding modes are supported for conversions from bf16x2 to f8x2.}} +  %res = nvvm.convert.bf16x2.to.f8x2 %src {rnd = #nvvm.fp_rnd_mode<rs>} : vector<2xbf16> -> i16 (f8E8M0FNU) +  llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir index e703600..5e85759 100644 --- a/mlir/test/Dialect/LLVMIR/rocdl.mlir +++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir @@ -664,6 +664,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {    llvm.return  } +llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { +  // CHECK-LABEL @rocdl.global.load.async.to.lds +  // CHECK: rocdl.global.load.async.to.lds.b8 %{{.*}}, %{{.*}}, 0, 0 +  // CHECK: rocdl.global.load.async.to.lds.b32 %{{.*}}, %{{.*}}, 0, 0 +  // CHECK: rocdl.global.load.async.to.lds.b64 %{{.*}}, %{{.*}}, 0, 0 +  // CHECK: rocdl.global.load.async.to.lds.b128 %{{.*}}, %{{.*}}, 0, 0 +  rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : <1>, <3> +  rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : <1>, <3> +  rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : <1>, <3> +  rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : <1>, <3> +  llvm.return +} +  // CHECK-LABEL @rocdl.tensor.load.to.lds  llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,                                      %dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) { diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir index a90c950..b1db99b 100644 --- a/mlir/test/Dialect/MemRef/ops.mlir +++ b/mlir/test/Dialect/MemRef/ops.mlir @@ -440,7 +440,8 @@ func.func @expand_collapse_shape_dynamic(%arg0: memref<?x?x?xf32>,           %arg4: index,           %arg5: index,           %arg6: index, -         %arg7: memref<4x?x4xf32>) { +         %arg7: memref<4x?x4xf32>, +         %arg8: memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>>) {  //       CHECK:   memref.collapse_shape {{.*}} {{\[}}[0, 1], [2]]  //  CHECK-SAME:     memref<?x?x?xf32> into memref<?x?xf32>    %0 = memref.collapse_shape %arg0 [[0, 1], [2]] : @@ -489,6 +490,10 @@ func.func @expand_collapse_shape_dynamic(%arg0: memref<?x?x?xf32>,  //       CHECK:   memref.expand_shape {{.*}} {{\[}}[0, 1], [2], [3, 4]]    %4 = memref.expand_shape %arg7 [[0, 1], [2], [3, 4]] output_shape [2, 2, %arg4, 2, 2]          : memref<4x?x4xf32> into memref<2x2x?x2x2xf32> + +//       CHECK:   memref.collapse_shape {{.*}} {{\[}}[0, 1], [2], [3]] +//  CHECK-SAME:     memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>> into memref<1x18x?xsi8, strided<[?, ?, 1], offset: ?>> +  %5 = memref.collapse_shape %arg8 [[0, 1], [2], [3]] : memref<1x1x18x?xsi8, strided<[?, ?, ?, 1], offset: ?>> into memref<1x18x?xsi8, strided<[?, ?, 1], offset: ?>>    return  } diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir index 26b63fb..0e75894 100644 --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -492,6 +492,15 @@ func.func @fct1(%0 : !llvm.ptr) -> () {  // ----- +%i1 = arith.constant 1 : i32 +%i2 = arith.constant 10 : i32 +// expected-error@+1 {{unstructured acc.loop must not have induction variables}} +acc.loop control(%iv : i32) = (%i1 : i32) to (%i2 : i32) step (%i1 : i32) { +  acc.yield +} attributes {independent = [#acc.device_type<none>], unstructured} + +// ----- +  // expected-error@+1 {{expect at least one of num, dim or static values}}  acc.loop gang({}) {    "test.openacc_dummy_op"() : () -> () diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir index 042ee25..df8ab9b 100644 --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -2143,6 +2143,20 @@ func.func @acc_loop_container() {  // ----- +func.func @acc_unstructured_loop() { +  acc.loop { +    acc.yield +  } attributes {independent = [#acc.device_type<none>], unstructured} +  return +} + +// CHECK-LABEL: func.func @acc_unstructured_loop +// CHECK:       acc.loop +// CHECK:         acc.yield +// CHECK:       } attributes {independent = [#acc.device_type<none>], unstructured} + +// ----- +  // Test private recipe with data bounds for array slicing  acc.private.recipe @privatization_memref_slice : memref<10x10xf32> init {  ^bb0(%arg0: memref<10x10xf32>, %bounds0: !acc.data_bounds_ty, %bounds1: !acc.data_bounds_ty): diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir index ebbe3ce..92f3537 100644 --- a/mlir/test/Dialect/XeGPU/invalid.mlir +++ b/mlir/test/Dialect/XeGPU/invalid.mlir @@ -451,7 +451,7 @@ func.func @store_scatter_offset_wi_1(%src: memref<?xf16>) {    %offsets = arith.constant dense<[0]> : vector<1xindex>    %mask = arith.constant dense<1>: vector<1xi1>    // expected-error@+1 {{Mask should match value except the chunk size dim}} -  xegpu.store %val, %src[%offsets], %mask  +  xegpu.store %val, %src[%offsets], %mask          : vector<4xf16>, memref<?xf16>, vector<1xindex>, vector<1xi1>    return  } @@ -871,14 +871,6 @@ func.func @load_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>) {  }  // ----- -func.func @load_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>) { -  // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}} -  %data2 = xegpu.load_matrix %arg0[8, 8] <{subgroup_block_io}>: !xegpu.mem_desc<16x64xf16> -> vector<16x16xf16> -  return -} - - -// -----  func.func @store_mem_desc_mismatch_element_type(%arg0: !xegpu.mem_desc<16x64xf16>, %arg1: vector<16x16xf32>) {    // expected-error@+1 {{failed to verify that all of {mem_desc, data} have same element type}}    xegpu.store_matrix %arg1, %arg0[8, 8] : vector<16x16xf32>, !xegpu.mem_desc<16x64xf16> @@ -900,16 +892,25 @@ func.func @store_mem_desc_invalid_rank(%arg0: !xegpu.mem_desc<64xf16>, %arg1: ve  }  // ----- -func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) { -  // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}} -  xegpu.store_matrix %data,  %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> +func.func @simt_store_matrix_vector_nonlinear(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>, %arg1: vector<2x16xf32>) { +  // expected-error@+1 {{With subgroup_block_io, accessed data must be contiguous and coalesced}} +  xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [2, 1]>} : +        vector<2x16xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1]>>    return  }  // ----- -func.func @store_mem_desc_invalid_attr2(%arg0: !xegpu.mem_desc<16x64xf16>, %data: vector<16x16xf16>) { -  // expected-error@+1 {{subgroup_block_io are only allowed when result is a 1D VectorType.}} -  xegpu.store_matrix %data,  %arg0[8, 8] <{subgroup_block_io}>: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> +func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>, %arg1: vector<16x2xf32>) { +  // expected-error@+1 {{With subgroup_block_io, the distributed dimensions must be contiguous}} +  xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 2]>} : +        vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [1, 16]>>    return  } +// ----- +func.func @simt_store_matrix_vector_noncoalesced(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>>, %arg1: vector<16x2xf32>) { +  // expected-error@+1 {{With subgroup_block_io, the block shape must match the lane layout}} +  xegpu.store_matrix %arg1, %arg0[0, 0] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>} : +        vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [32, 1], block = [1, 17]>> +  return +} diff --git a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir index 27a3dc3..8946d14 100644 --- a/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir +++ b/mlir/test/Dialect/XeGPU/subgroup-distribute.mlir @@ -265,3 +265,66 @@ gpu.module @xevm_module{      gpu.return    }  } + +// ----- +// CHECK-LABEL: gpu.func @load_store_matrix_1({{.*}}) { +// CHECK: %[[LAYOUT_X:.*]] = arith.constant 8 : index +// CHECK: %[[LAYOUT_Y:.*]] = arith.constant 2 : index +// CHECK: %[[LANE_ID:.*]] = gpu.lane_id +// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]] +// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]] +// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_Y]], %[[LAYOUT_Y]] +// CHECK: %[[LANE_X_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[LAYOUT_X]] +// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32> +// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<1x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index +gpu.module @xevm_module{ +  gpu.func @load_store_matrix_1(%arg0: !xegpu.mem_desc<32x32xf32>) { +    %c0 = arith.constant 0 : index +    %1 = xegpu.load_matrix %arg0[%c0, %c0] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}> : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x8xf32> +    xegpu.store_matrix %1, %arg0[%c0, %c0] <{layout = #xegpu.layout<lane_layout = [2, 8], lane_data = [1, 1]>}> : vector<2x8xf32>, !xegpu.mem_desc<32x32xf32>, index, index +    gpu.return +  } +} + +// ----- +// CHECK-LABEL: gpu.func @load_store_matrix_2({{.*}}) { +// CHECK: %[[DIST_UNIT_HEIGHT_X:.*]] = arith.constant 4 : index +// CHECK: %[[DIST_UNIT_HEIGHT_Y:.*]] = arith.constant 8 : index +// CHECK: %[[LANE_DATA_Y:.*]] = arith.constant 2 : index +// CHECK: %[[USER_OFFSET_X:.*]] = arith.constant 1 : index +// CHECK: %[[LANE_ID:.*]] = gpu.lane_id +// CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]] +// CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]] +// CHECK: %[[LANE_Y_OFFSET_1:.*]] = index.mul %[[DELINEARIZED_LANE_Y]], %[[LANE_DATA_Y]] +// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[LANE_Y_OFFSET_1]], %[[DIST_UNIT_HEIGHT_Y]] +// CHECK: %[[LANE_X_OFFSET_1:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[DIST_UNIT_HEIGHT_X]] +// CHECK: %[[LANE_X_OFFSET:.*]] = index.add %[[LANE_X_OFFSET_1]], %[[USER_OFFSET_X]] +// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32> +// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index +gpu.module @xevm_module{ +  gpu.func @load_store_matrix_2(%arg0: !xegpu.mem_desc<32x32xf32>) { +    %c0 = arith.constant 0 : index +    %c1 = arith.constant 1 : index +    %1 = xegpu.load_matrix %arg0[%c0, %c1] <{layout = #xegpu.layout<lane_layout = [4, 4], lane_data = [2, 1]>}> : !xegpu.mem_desc<32x32xf32>, index, index -> vector<8x4xf32> +    xegpu.store_matrix %1, %arg0[%c0, %c1] <{layout = #xegpu.layout<lane_layout = [4, 4], lane_data = [2, 1]>}> : vector<8x4xf32>, !xegpu.mem_desc<32x32xf32>, index, index +    gpu.return +  } +} + +// ----- +// CHECK-LABEL: gpu.func @load_store_matrix_3({{.*}}) { +// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>: +// CHECK-SAME: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 1], stride = [1, 32]>>, index, index -> vector<1x2xf32> +// CHECK: xegpu.store_matrix %[[MAT]], %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>: +// CHECK-SAME: vector<1x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 1], stride = [1, 32]>>, index, index +gpu.module @xevm_module{ +  gpu.func @load_store_matrix_3(%arg0: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>) { +    %c0 = arith.constant 0 : index +    %c1 = arith.constant 1 : index +    %1 = xegpu.load_matrix %arg0[%c0, %c1] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : +      !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>, index, index -> vector<16x2xf32> +    xegpu.store_matrix %1, %arg0[%c0, %c1] {subgroup_block_io, layout = #xegpu.layout<lane_layout = [16, 1], lane_data = [1, 1]>} : +      vector<16x2xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<stride = [1, 32], block = [16, 1]>>, index, index +    gpu.return +  } +} diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index 60bd24a..1e4cf8d 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -1308,7 +1308,7 @@ llvm.func @experimental_constrained_fpext(%s: f32, %v: vector<4xf32>) {  // CHECK-DAG: declare float @llvm.cos.f32(float)  // CHECK-DAG: declare <8 x float> @llvm.cos.v8f32(<8 x float>) #0  // CHECK-DAG: declare { float, float } @llvm.sincos.f32(float) -// CHECK-DAG: declare { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float>) #0 +// CHECK-DAG: declare { <8 x float>, <8 x float> } @llvm.sincos.v8f32(<8 x float>)  // CHECK-DAG: declare float @llvm.copysign.f32(float, float)  // CHECK-DAG: declare float @llvm.rint.f32(float)  // CHECK-DAG: declare double @llvm.rint.f64(double) diff --git a/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir new file mode 100644 index 0000000..b5bb223 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/convert_stochastic_rounding.mlir @@ -0,0 +1,182 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// ----- + +// Test valid architectures work + +// Valid case on sm_100a +gpu.module @valid_f16x2_rs_sm_100a [#nvvm.target<chip = "sm_100a">] { +  func.func @convert_rs() { +    %f1 = llvm.mlir.constant(1.0 : f32) : f32 +    %f2 = llvm.mlir.constant(2.0 : f32) : f32 +    %rbits = llvm.mlir.constant(0x12345678 : i32) : i32 +    %res = nvvm.convert.f32x2.to.f16x2 %f1, %f2, %rbits : vector<2xf16> +    return +  } +} + +// Valid case on sm_103a +gpu.module @valid_bf16x2_rs_sm_103a [#nvvm.target<chip = "sm_103a">] { +  func.func @convert_rs() { +    %f1 = llvm.mlir.constant(1.0 : f32) : f32 +    %f2 = llvm.mlir.constant(2.0 : f32) : f32 +    %rbits = llvm.mlir.constant(0 : i32) : i32 +    %res = nvvm.convert.f32x2.to.bf16x2 %f1, %f2, %rbits : vector<2xbf16> +    return +  } +} + +// ----- + +// Test F32x2 -> F16x2 with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x2_to_f16x2_rs +llvm.func @convert_f32x2_to_f16x2_rs(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> { +  // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB,  %rbits : vector<2xf16> +  llvm.return %res : vector<2xf16> +} + +// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_satfinite +llvm.func @convert_f32x2_to_f16x2_rs_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> { +  // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {sat = #nvvm.sat_mode<satfinite>} : vector<2xf16> +  llvm.return %res : vector<2xf16> +} + +// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_relu +llvm.func @convert_f32x2_to_f16x2_rs_relu(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> { +  // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true} : vector<2xf16> +  llvm.return %res : vector<2xf16> +} + +// CHECK-LABEL: @convert_f32x2_to_f16x2_rs_relu_satfinite +llvm.func @convert_f32x2_to_f16x2_rs_relu_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xf16> { +  // CHECK: %{{.*}} = call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.f16x2 %srcA, %srcB, %rbits {relu = true, sat = #nvvm.sat_mode<satfinite>} : vector<2xf16> +  llvm.return %res : vector<2xf16> +} + +// ----- + +// Test F32x2 -> BF16x2 with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs +llvm.func @convert_f32x2_to_bf16x2_rs(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> { +  // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB,  %rbits : vector<2xbf16> +  llvm.return %res : vector<2xbf16> +} + +// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_satfinite +llvm.func @convert_f32x2_to_bf16x2_rs_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> { +  // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> +  llvm.return %res : vector<2xbf16> +} + +// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_relu +llvm.func @convert_f32x2_to_bf16x2_rs_relu(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> { +  // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true} : vector<2xbf16> +  llvm.return %res : vector<2xbf16> +} + +// CHECK-LABEL: @convert_f32x2_to_bf16x2_rs_relu_satfinite +llvm.func @convert_f32x2_to_bf16x2_rs_relu_satfinite(%srcA : f32, %srcB : f32, %rbits : i32) -> vector<2xbf16> { +  // CHECK: %{{.*}} = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float %{{.*}}, float %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x2.to.bf16x2 %srcA, %srcB, %rbits {relu = true, sat = #nvvm.sat_mode<satfinite>} : vector<2xbf16> +  llvm.return %res : vector<2xbf16> +} + +// ----- + +// Test F32x4 -> F8x4 (E4M3) with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x4_to_f8x4_e4m3_rs +llvm.func @convert_f32x4_to_f8x4_e4m3_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E4M3FN) +  llvm.return %res : vector<4xi8> +} + +// CHECK-LABEL: @convert_f32x4_to_f8x4_e4m3_rs_relu +llvm.func @convert_f32x4_to_f8x4_e4m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E4M3FN) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test F32x4 -> F8x4 (E5M2) with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x4_to_f8x4_e5m2_rs +llvm.func @convert_f32x4_to_f8x4_e5m2_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f8E5M2) +  llvm.return %res : vector<4xi8> +} + +// CHECK-LABEL: @convert_f32x4_to_f8x4_e5m2_rs_relu +llvm.func @convert_f32x4_to_f8x4_e5m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f8x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f8E5M2) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test F32x4 -> F6x4 (E2M3) with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x4_to_f6x4_e2m3_rs +llvm.func @convert_f32x4_to_f6x4_e2m3_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f6E2M3FN) +  llvm.return %res : vector<4xi8> +} + +// CHECK-LABEL: @convert_f32x4_to_f6x4_e2m3_rs_relu +llvm.func @convert_f32x4_to_f6x4_e2m3_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E2M3FN) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test F32x4 -> F6x4 (E3M2) with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x4_to_f6x4_e3m2_rs +llvm.func @convert_f32x4_to_f6x4_e3m2_rs(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits : vector<4xf32> -> vector<4xi8> (f6E3M2FN) +  llvm.return %res : vector<4xi8> +} + +// CHECK-LABEL: @convert_f32x4_to_f6x4_e3m2_rs_relu +llvm.func @convert_f32x4_to_f6x4_e3m2_rs_relu(%src : vector<4xf32>, %rbits : i32) -> vector<4xi8> { +  // CHECK: %{{.*}} = call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f6x4 %src, %rbits {relu = true} : vector<4xf32> -> vector<4xi8> (f6E3M2FN) +  llvm.return %res : vector<4xi8> +} + +// ----- + +// Test F32x4 -> F4x4 (E2M1) with stochastic rounding (.rs) + +// CHECK-LABEL: @convert_f32x4_to_f4x4_e2m1_rs +llvm.func @convert_f32x4_to_f4x4_e2m1_rs(%src : vector<4xf32>, %rbits : i32) -> i16 { +  // CHECK: %{{.*}} = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits : vector<4xf32> -> i16 (f4E2M1FN) +  llvm.return %res : i16 +} + +// CHECK-LABEL: @convert_f32x4_to_f4x4_e2m1_rs_relu +llvm.func @convert_f32x4_to_f4x4_e2m1_rs_relu(%src : vector<4xf32>, %rbits : i32) -> i16 { +  // CHECK: %{{.*}} = call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> %{{.*}}, i32 %{{.*}}) +  %res = nvvm.convert.f32x4.to.f4x4 %src, %rbits {relu = true} : vector<4xf32> -> i16 (f4E2M1FN) +  llvm.return %res : i16 +} + diff --git a/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir new file mode 100644 index 0000000..a8a7430 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/redux-sync-invalid.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s + +// ----- + +llvm.func @redux_sync_i32_with_abs(%value: i32, %offset: i32) { +  // expected-error@+1 {{abs attribute is supported only for f32 type}} +  %res = nvvm.redux.sync add %value, %offset {abs = true}: i32 -> i32 +  llvm.return +} + +// ----- + +llvm.func @redux_sync_i32_with_nan(%value: i32, %offset: i32) { +  // expected-error@+1 {{nan attribute is supported only for f32 type}} +  %res = nvvm.redux.sync add %value, %offset {nan = true}: i32 -> i32 +  llvm.return +} + +// ----- + +llvm.func @redux_sync_f32_with_invalid_kind_add(%value: f32, %offset: i32) { +  // expected-error@+1 {{'add' redux kind unsupported with 'f32' type. Only supported type is 'i32'.}} +  %res = nvvm.redux.sync add %value, %offset: f32 -> f32 +  llvm.return +} + +// ----- + +llvm.func @redux_sync_f32_with_invalid_kind_and(%value: f32, %offset: i32) { +  // expected-error@+1 {{'and' redux kind unsupported with 'f32' type. Only supported type is 'i32'.}} +  %res = nvvm.redux.sync and %value, %offset: f32 -> f32 +  llvm.return +} + +// ----- + +llvm.func @redux_sync_i32_with_invalid_kind_fmin(%value: i32, %offset: i32) { +  // expected-error@+1 {{'fmin' redux kind unsupported with 'i32' type. Only supported type is 'f32'.}} +  %res = nvvm.redux.sync fmin %value, %offset: i32 -> i32 +  llvm.return +} + +// ----- + +llvm.func @redux_sync_non_matching_types(%value: i32, %offset: i32) { +  // expected-error@+1 {{failed to verify that all of {res, val} have same type}} +  %res = nvvm.redux.sync add %value, %offset: i32 -> f32 +  llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir new file mode 100644 index 0000000..f2ccfe7 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/shfl-sync-invalid.mlir @@ -0,0 +1,22 @@ +// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s + +// ----- + +func.func @nvvm_invalid_shfl_pred(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) { +  // expected-error@+1 {{"return_value_and_is_valid" attribute must be specified when the return type is a struct type}} +  %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 : f32 -> !llvm.struct<(f32, i1)> +} + +// ----- + +func.func @nvvm_invalid_shfl_invalid_return_type_1(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) { +  // expected-error@+1 {{expected return type to be of type 'f32' but got 'i32' instead}} +  %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 : f32 -> i32 +} + +// ----- + +func.func @nvvm_invalid_shfl_invalid_return_type_2(%arg0 : i32, %arg1 : f32, %arg2 : i32, %arg3 : i32) { +  // expected-error@+1 {{expected first element in the returned struct to be of type 'f32' but got 'i32' instead}} +  %0 = nvvm.shfl.sync bfly %arg0, %arg1, %arg2, %arg3 {return_value_and_is_valid} : f32 -> !llvm.struct<(i32, i1)> +} diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir new file mode 100644 index 0000000..1b93f20c --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-ld-invalid.mlir @@ -0,0 +1,9 @@ +// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s + +// ----- + +llvm.func @nvvm_tcgen05_ld_32x32b_offset(%tmemAddr : !llvm.ptr<6>, %offset : i64) -> () { +  // expected-error@+1 {{offset argument is only supported for shape 16x32bx2}} +  %ldv2 = nvvm.tcgen05.ld %tmemAddr, %offset { pack, shape = #nvvm.tcgen05_ldst_shape<shape_32x32b>} : vector<2 x i32> +  llvm.return +} diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir index 8a848221..3fbd9e0 100644 --- a/mlir/test/Target/LLVMIR/rocdl.mlir +++ b/mlir/test/Target/LLVMIR/rocdl.mlir @@ -1040,6 +1040,19 @@ llvm.func @rocdl.global.load.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) {    llvm.return  } +// CHECK-LABEL: rocdl.global.load.async.to.lds +llvm.func @rocdl.global.load.async.to.lds(%src : !llvm.ptr<1>, %dst: !llvm.ptr<3>) { +  // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b8 +  rocdl.global.load.async.to.lds.b8 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> +  // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b32 +  rocdl.global.load.async.to.lds.b32 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> +  // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b64 +  rocdl.global.load.async.to.lds.b64 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> +  // CHECK: call void @llvm.amdgcn.global.load.async.to.lds.b128 +  rocdl.global.load.async.to.lds.b128 %src, %dst, 0, 0 : !llvm.ptr<1>, !llvm.ptr<3> +  llvm.return +} +  // CHECK-LABEL: rocdl.tensor.load.to.lds  llvm.func @rocdl.tensor.load.to.lds(%dgroup0 : vector<4xi32>, %dgroup1 : vector<8xi32>,                                      %dgroup2 : vector<4xi32>, %dgroup3 : vector<4xi32>) { diff --git a/mlir/test/Target/SPIRV/group-ops.mlir b/mlir/test/Target/SPIRV/group-ops.mlir index cf519cb..6f19b35 100644 --- a/mlir/test/Target/SPIRV/group-ops.mlir +++ b/mlir/test/Target/SPIRV/group-ops.mlir @@ -1,11 +1,13 @@ -// RUN: mlir-translate -no-implicit-module -test-spirv-roundtrip -split-input-file %s | FileCheck %s +// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip --split-input-file %s | FileCheck %s  // RUN: %if spirv-tools %{ rm -rf %t %}  // RUN: %if spirv-tools %{ mkdir %t %}  // RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --split-input-file --spirv-save-validation-files-with-prefix=%t/module %s %}  // RUN: %if spirv-tools %{ spirv-val %t %} -spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, Linkage, SubgroupBallotKHR, Groups, SubgroupBufferBlockIOINTEL, GroupNonUniformArithmetic, GroupUniformArithmeticKHR], [SPV_KHR_storage_buffer_storage_class, SPV_KHR_shader_ballot, SPV_INTEL_subgroups, SPV_KHR_uniform_group_instructions]> { +spirv.module Logical GLSL450 requires #spirv.vce<v1.3, +  [Shader, Linkage, SubgroupBallotKHR, Groups, GroupNonUniformArithmetic, GroupUniformArithmeticKHR], +  [SPV_KHR_storage_buffer_storage_class, SPV_KHR_shader_ballot, SPV_KHR_uniform_group_instructions]> {    // CHECK-LABEL: @subgroup_ballot    spirv.func @subgroup_ballot(%predicate: i1) -> vector<4xi32> "None" {      // CHECK: %{{.*}} = spirv.KHR.SubgroupBallot %{{.*}}: vector<4xi32> @@ -24,30 +26,6 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.3, [Shader, Linkage, Subgrou      %0 = spirv.GroupBroadcast <Workgroup> %value, %localid : f32, vector<3xi32>      spirv.ReturnValue %0: f32    } -  // CHECK-LABEL: @subgroup_block_read_intel -  spirv.func @subgroup_block_read_intel(%ptr : !spirv.ptr<i32, StorageBuffer>) -> i32 "None" { -    // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> i32 -    %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> i32 -    spirv.ReturnValue %0: i32 -  } -  // CHECK-LABEL: @subgroup_block_read_intel_vector -  spirv.func @subgroup_block_read_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>) -> vector<3xi32> "None" { -    // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32> -    %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32> -    spirv.ReturnValue %0: vector<3xi32> -  } -  // CHECK-LABEL: @subgroup_block_write_intel -  spirv.func @subgroup_block_write_intel(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: i32) -> () "None" { -    // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : i32 -    spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : i32 -    spirv.Return -  } -  // CHECK-LABEL: @subgroup_block_write_intel_vector -  spirv.func @subgroup_block_write_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: vector<3xi32>) -> () "None" { -    // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : vector<3xi32> -    spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : vector<3xi32> -    spirv.Return -  }    // CHECK-LABEL: @group_iadd    spirv.func @group_iadd(%value: i32) -> i32 "None" {      // CHECK: spirv.GroupIAdd <Workgroup> <Reduce> %{{.*}} : i32 diff --git a/mlir/test/Target/SPIRV/subgroup-block-intel.mlir b/mlir/test/Target/SPIRV/subgroup-block-intel.mlir new file mode 100644 index 0000000..14060e6 --- /dev/null +++ b/mlir/test/Target/SPIRV/subgroup-block-intel.mlir @@ -0,0 +1,34 @@ +// RUN: mlir-translate --no-implicit-module --test-spirv-roundtrip %s | FileCheck %s + +// RUN: %if spirv-tools %{ rm -rf %t %} +// RUN: %if spirv-tools %{ mkdir %t %} +// RUN: %if spirv-tools %{ mlir-translate --no-implicit-module --serialize-spirv --spirv-save-validation-files-with-prefix=%t/module %s %} +// RUN: %if spirv-tools %{ spirv-val %t %} + +spirv.module Physical64 GLSL450 requires #spirv.vce<v1.3, [Addresses, Shader, Linkage, SubgroupBufferBlockIOINTEL], +                                                          [SPV_KHR_storage_buffer_storage_class, SPV_INTEL_subgroups]> { +  // CHECK-LABEL: @subgroup_block_read_intel +  spirv.func @subgroup_block_read_intel(%ptr : !spirv.ptr<i32, StorageBuffer>) -> i32 "None" { +    // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> i32 +    %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> i32 +    spirv.ReturnValue %0: i32 +  } +  // CHECK-LABEL: @subgroup_block_read_intel_vector +  spirv.func @subgroup_block_read_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>) -> vector<3xi32> "None" { +    // CHECK: spirv.INTEL.SubgroupBlockRead %{{.*}} : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32> +    %0 = spirv.INTEL.SubgroupBlockRead %ptr : !spirv.ptr<i32, StorageBuffer> -> vector<3xi32> +    spirv.ReturnValue %0: vector<3xi32> +  } +  // CHECK-LABEL: @subgroup_block_write_intel +  spirv.func @subgroup_block_write_intel(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: i32) -> () "None" { +    // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : i32 +    spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : i32 +    spirv.Return +  } +  // CHECK-LABEL: @subgroup_block_write_intel_vector +  spirv.func @subgroup_block_write_intel_vector(%ptr : !spirv.ptr<i32, StorageBuffer>, %value: vector<3xi32>) -> () "None" { +    // CHECK: spirv.INTEL.SubgroupBlockWrite %{{.*}}, %{{.*}} : vector<3xi32> +    spirv.INTEL.SubgroupBlockWrite "StorageBuffer" %ptr, %value : vector<3xi32> +    spirv.Return +  } +} diff --git a/mlir/test/Transforms/test-legalizer-no-materializations.mlir b/mlir/test/Transforms/test-legalizer-no-materializations.mlir new file mode 100644 index 0000000..82dd742 --- /dev/null +++ b/mlir/test/Transforms/test-legalizer-no-materializations.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0 build-materializations=0 attach-debug-materialization-kind=1" -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-KIND + +// CHECK-LABEL: func @dropped_input_in_use +// CHECK-KIND-LABEL: func @dropped_input_in_use +func.func @dropped_input_in_use(%arg: i16, %arg2: i64) { +  // CHECK-NEXT: %[[cast:.*]] = "test.cast"() : () -> i16 +  // CHECK-NEXT: "work"(%[[cast]]) : (i16) +  // CHECK-KIND-NEXT: %[[cast:.*]] = builtin.unrealized_conversion_cast to i16 {__kind__ = "source"} +  // CHECK-KIND-NEXT: "work"(%[[cast]]) : (i16) +  // expected-remark@+1 {{op 'work' is not legalizable}} +  "work"(%arg) : (i16) -> () +} + +// ----- + +// CHECK-KIND-LABEL: func @test_lookup_without_converter +//       CHECK-KIND:   %[[producer:.*]] = "test.valid_producer"() : () -> i16 +//       CHECK-KIND:   %[[cast:.*]] = builtin.unrealized_conversion_cast %[[producer]] : i16 to f64 {__kind__ = "target"} +//       CHECK-KIND:   "test.valid_consumer"(%[[cast]]) : (f64) -> () +//       CHECK-KIND:   "test.valid_consumer"(%[[producer]]) : (i16) -> () +func.func @test_lookup_without_converter() { +  %0 = "test.replace_with_valid_producer"() {type = i16} : () -> (i64) +  "test.replace_with_valid_consumer"(%0) {with_converter} : (i64) -> () +  // Make sure that the second "replace_with_valid_consumer" lowering does not +  // lookup the materialization that was created for the above op. +  "test.replace_with_valid_consumer"(%0) : (i64) -> () +  // expected-remark@+1 {{op 'func.return' is not legalizable}} +  return +} + +// ----- + +// CHECK-LABEL: func @remap_moved_region_args +func.func @remap_moved_region_args() { +  // CHECK-NEXT: return +  // CHECK-NEXT: ^bb1(%[[arg0:.*]]: i64, %[[arg1:.*]]: i16, %[[arg2:.*]]: i64, %[[arg3:.*]]: f32): +  // CHECK-NEXT: %[[cast1:.*]]:2 = builtin.unrealized_conversion_cast %[[arg3]] : f32 to f16, f16 +  // CHECK-NEXT: %[[cast2:.*]] = builtin.unrealized_conversion_cast %[[arg2]] : i64 to f64 +  // CHECK-NEXT: %[[cast3:.*]] = builtin.unrealized_conversion_cast %[[arg0]] : i64 to f64 +  // CHECK-NEXT: %[[cast4:.*]] = "test.cast"(%[[cast1]]#0, %[[cast1]]#1) : (f16, f16) -> f32 +  // CHECK-NEXT: "test.valid"(%[[cast3]], %[[cast2]], %[[cast4]]) : (f64, f64, f32) +  "test.region"() ({ +    ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): +      "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () +  }) : () -> () +  // expected-remark@+1 {{op 'func.return' is not legalizable}} +  return +} + +// ----- + +// CHECK-LABEL: func @remap_cloned_region_args +func.func @remap_cloned_region_args() { +  // CHECK-NEXT: return +  // CHECK-NEXT: ^bb1(%[[arg0:.*]]: i64, %[[arg1:.*]]: i16, %[[arg2:.*]]: i64, %[[arg3:.*]]: f32): +  // CHECK-NEXT: %[[cast1:.*]]:2 = builtin.unrealized_conversion_cast %[[arg3]] : f32 to f16, f16 +  // CHECK-NEXT: %[[cast2:.*]] = builtin.unrealized_conversion_cast %[[arg2]] : i64 to f64 +  // CHECK-NEXT: %[[cast3:.*]] = builtin.unrealized_conversion_cast %[[arg0]] : i64 to f64 +  // CHECK-NEXT: %[[cast4:.*]] = "test.cast"(%[[cast1]]#0, %[[cast1]]#1) : (f16, f16) -> f32 +  // CHECK-NEXT: "test.valid"(%[[cast3]], %[[cast2]], %[[cast4]]) : (f64, f64, f32) +  "test.region"() ({ +    ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): +      "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () +  }) {legalizer.should_clone} : () -> () +  // expected-remark@+1 {{op 'func.return' is not legalizable}} +  return +} diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 94c5bb4..7c43bb7 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -1,7 +1,6 @@  // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=1" -verify-diagnostics %s | FileCheck %s  // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=1" -verify-diagnostics -profile-actions-to=- %s | FileCheck %s --check-prefix=CHECK-PROFILER  // RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0" -verify-diagnostics %s | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect -split-input-file -test-legalize-patterns="allow-pattern-rollback=0 build-materializations=0 attach-debug-materialization-kind=1" -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-KIND  // CHECK-PROFILER: "name": "pass-execution", "cat": "PERF", "ph": "B"  // CHECK-PROFILER: "name": "apply-conversion", "cat": "PERF", "ph": "B" @@ -146,36 +145,6 @@ func.func @no_remap_nested() {  // ----- -// CHECK-LABEL: func @remap_moved_region_args -func.func @remap_moved_region_args() { -  // CHECK-NEXT: return -  // CHECK-NEXT: ^bb1(%{{.*}}: f64, %{{.*}}: f64, %{{.*}}: f16, %{{.*}}: f16): -  // CHECK-NEXT: "test.cast"{{.*}} : (f16, f16) -> f32 -  // CHECK-NEXT: "test.valid"{{.*}} : (f64, f64, f32) -  "test.region"() ({ -    ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): -      "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () -  }) : () -> () -  // expected-remark@+1 {{op 'func.return' is not legalizable}} -  return -} - -// ----- - -// CHECK-LABEL: func @remap_cloned_region_args -func.func @remap_cloned_region_args() { -  // CHECK-NEXT: return -  // CHECK-NEXT: ^bb1(%{{.*}}: f64, %{{.*}}: f64, %{{.*}}: f16, %{{.*}}: f16): -  // CHECK-NEXT: "test.cast"{{.*}} : (f16, f16) -> f32 -  // CHECK-NEXT: "test.valid"{{.*}} : (f64, f64, f32) -  "test.region"() ({ -    ^bb1(%i0: i64, %unused: i16, %i1: i64, %2: f32): -      "test.invalid"(%i0, %i1, %2) : (i64, i64, f32) -> () -  }) {legalizer.should_clone} : () -> () -  // expected-remark@+1 {{op 'func.return' is not legalizable}} -  return -} -  // CHECK-LABEL: func @remap_drop_region  func.func @remap_drop_region() {    // CHECK-NEXT: return @@ -191,12 +160,9 @@ func.func @remap_drop_region() {  // -----  // CHECK-LABEL: func @dropped_input_in_use -// CHECK-KIND-LABEL: func @dropped_input_in_use  func.func @dropped_input_in_use(%arg: i16, %arg2: i64) {    // CHECK-NEXT: %[[cast:.*]] = "test.cast"() : () -> i16    // CHECK-NEXT: "work"(%[[cast]]) : (i16) -  // CHECK-KIND-NEXT: %[[cast:.*]] = builtin.unrealized_conversion_cast to i16 {__kind__ = "source"} -  // CHECK-KIND-NEXT: "work"(%[[cast]]) : (i16)    // expected-remark@+1 {{op 'work' is not legalizable}}    "work"(%arg) : (i16) -> ()  } @@ -452,11 +418,6 @@ func.func @test_multiple_1_to_n_replacement() {  //       CHECK:   %[[cast:.*]] = "test.cast"(%[[producer]]) : (i16) -> f64  //       CHECK:   "test.valid_consumer"(%[[cast]]) : (f64) -> ()  //       CHECK:   "test.valid_consumer"(%[[producer]]) : (i16) -> () -// CHECK-KIND-LABEL: func @test_lookup_without_converter -//       CHECK-KIND:   %[[producer:.*]] = "test.valid_producer"() : () -> i16 -//       CHECK-KIND:   %[[cast:.*]] = builtin.unrealized_conversion_cast %[[producer]] : i16 to f64 {__kind__ = "target"} -//       CHECK-KIND:   "test.valid_consumer"(%[[cast]]) : (f64) -> () -//       CHECK-KIND:   "test.valid_consumer"(%[[producer]]) : (i16) -> ()  func.func @test_lookup_without_converter() {    %0 = "test.replace_with_valid_producer"() {type = i16} : () -> (i64)    "test.replace_with_valid_consumer"(%0) {with_converter} : (i64) -> () diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index fd2b943..12edecc 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -1553,8 +1553,7 @@ struct TestLegalizePatternDriver                             [](Type type) { return type.isF32(); });      });      target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) { -      return converter.isSignatureLegal(op.getFunctionType()) && -             converter.isLegal(&op.getBody()); +      return converter.isSignatureLegal(op.getFunctionType());      });      target.addDynamicallyLegalOp<func::CallOp>(          [&](func::CallOp op) { return converter.isLegal(op); }); @@ -2156,8 +2155,7 @@ struct TestTypeConversionDriver                recursiveType.getName() == "outer_converted_type");      });      target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp op) { -      return converter.isSignatureLegal(op.getFunctionType()) && -             converter.isLegal(&op.getBody()); +      return converter.isSignatureLegal(op.getFunctionType());      });      target.addDynamicallyLegalOp<TestCastOp>([&](TestCastOp op) {        // Allow casts from F64 to F32. diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index 76d4611..93d5144 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -200,7 +200,8 @@ class TestStepOpPattern : public OpConversionPattern<vector::StepOp> {      Value sgId =          gpu::SubgroupIdOp::create(rewriter, loc, /*upper_bound=*/nullptr); -    auto maybeOffsets = sliceAttr.getOffsets(rewriter, loc, sgId, wgShape); +    auto maybeOffsets = +        sliceAttr.computeDistributedCoords(rewriter, loc, sgId, wgShape);      if (failed(maybeOffsets))        return failure(); diff --git a/mlir/test/python/CMakeLists.txt b/mlir/test/python/CMakeLists.txt index 2c12381..c81f75f 100644 --- a/mlir/test/python/CMakeLists.txt +++ b/mlir/test/python/CMakeLists.txt @@ -11,7 +11,7 @@ add_public_tablegen_target(MLIRPythonTestIncGen)  add_subdirectory(lib) -set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules mlir-runner) +set(MLIR_PYTHON_TEST_DEPENDS MLIRPythonModules mlir-runner mlir_c_runner_utils mlir_runner_utils)  if(NOT MLIR_STANDALONE_BUILD)    list(APPEND MLIR_PYTHON_TEST_DEPENDS FileCheck count not)  endif()  | 
