diff options
author | Nicolas Vasilache <ntv@google.com> | 2020-06-05 13:41:05 -0400 |
---|---|---|
committer | Nicolas Vasilache <ntv@google.com> | 2020-06-05 13:47:29 -0400 |
commit | eb7db879af4f550da820e092e7b791243a3166f8 (patch) | |
tree | bd3b2a0602f2770afab3bd32480c5dbe9fd7fc2b | |
parent | a018b538a6177b7fb50bad067ca4e828a85dbf76 (diff) | |
download | llvm-eb7db879af4f550da820e092e7b791243a3166f8.zip llvm-eb7db879af4f550da820e092e7b791243a3166f8.tar.gz llvm-eb7db879af4f550da820e092e7b791243a3166f8.tar.bz2 |
[mlir][test][CPU] Reduce the size of mlir-cpu-runner-tests
Two tests regularly show up on the long tail when testing MLIR.
This revision reduces their size.
-rw-r--r-- | mlir/test/mlir-cpu-runner/linalg_integration_test.mlir | 20 | ||||
-rw-r--r-- | mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir | 58 |
2 files changed, 39 insertions, 39 deletions
diff --git a/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir b/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir index 9400792..43641fd 100644 --- a/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir +++ b/mlir/test/mlir-cpu-runner/linalg_integration_test.mlir @@ -67,24 +67,24 @@ func @matmul() -> f32 { %c1 = constant 1 : index %c6 = constant 6 : index %c7 = constant 7 : index - %c10 = constant 10 : index + %c2 = constant 2 : index %c16 = constant 16 : index - %c100 = constant 100 : index - %c160 = constant 160 : index + %c4 = constant 4 : index + %c32 = constant 32 : index %f1 = constant 1.00000e+00 : f32 %f2 = constant 2.00000e+00 : f32 %f10 = constant 10.00000e+00 : f32 - %bA = call @alloc_filled_f32(%c160, %f2) : (index, f32) -> (memref<?xi8>) - %bB = call @alloc_filled_f32(%c160, %f1) : (index, f32) -> (memref<?xi8>) - %bC = call @alloc_filled_f32(%c100, %f10) : (index, f32) -> (memref<?xi8>) + %bA = call @alloc_filled_f32(%c32, %f2) : (index, f32) -> (memref<?xi8>) + %bB = call @alloc_filled_f32(%c32, %f1) : (index, f32) -> (memref<?xi8>) + %bC = call @alloc_filled_f32(%c4, %f10) : (index, f32) -> (memref<?xi8>) - %A = view %bA[%c0][%c10, %c16] : memref<?xi8> to memref<?x?xf32> - %B = view %bB[%c0][%c16, %c10] : memref<?xi8> to memref<?x?xf32> - %C = view %bC[%c0][%c10, %c10] : memref<?xi8> to memref<?x?xf32> + %A = view %bA[%c0][%c2, %c16] : memref<?xi8> to memref<?x?xf32> + %B = view %bB[%c0][%c16, %c2] : memref<?xi8> to memref<?x?xf32> + %C = view %bC[%c0][%c2, %c2] : memref<?xi8> to memref<?x?xf32> linalg.matmul(%A, %B, %C) : memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32> - %res = load %C[%c6, %c7] : memref<?x?xf32> + %res = load %C[%c0, %c1] : memref<?x?xf32> dealloc %bC : memref<?xi8> dealloc %bB : memref<?xi8> diff --git a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir index c65a3e3..ef2fe6c 100644 --- a/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm_naive_codegen.mlir @@ -1,31 +1,31 @@ // RUN: mlir-opt -convert-linalg-to-loops -lower-affine -convert-scf-to-std -convert-std-to-llvm %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext | FileCheck %s func @main() { - %A = alloc() : memref<64x64xf32> - %B = alloc() : memref<64x64xf32> - %C = alloc() : memref<64x64xf32> + %A = alloc() : memref<16x16xf32> + %B = alloc() : memref<16x16xf32> + %C = alloc() : memref<16x16xf32> %cf1 = constant 1.00000e+00 : f32 - linalg.fill(%A, %cf1) : memref<64x64xf32>, f32 - linalg.fill(%B, %cf1) : memref<64x64xf32>, f32 + linalg.fill(%A, %cf1) : memref<16x16xf32>, f32 + linalg.fill(%B, %cf1) : memref<16x16xf32>, f32 %reps = constant 1 : index - %t_start = call @rtclock() : () -> f64 + %t_start = call @rtclock() : () -> f16 affine.for %arg0 = 0 to 5 { - linalg.fill(%C, %cf1) : memref<64x64xf32>, f32 - call @sgemm_naive(%A, %B, %C) : (memref<64x64xf32>, memref<64x64xf32>, memref<64x64xf32>) -> () + linalg.fill(%C, %cf1) : memref<16x16xf32>, f32 + call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> () } - %t_end = call @rtclock() : () -> f64 - %t = subf %t_end, %t_start : f64 + %t_end = call @rtclock() : () -> f16 + %t = subf %t_end, %t_start : f16 - %pC = memref_cast %C : memref<64x64xf32> to memref<*xf32> + %pC = memref_cast %C : memref<16x16xf32> to memref<*xf32> call @print_memref_f32(%pC) : (memref<*xf32>) -> () - %M = dim %C, 0 : memref<64x64xf32> - %N = dim %C, 1 : memref<64x64xf32> - %K = dim %A, 1 : memref<64x64xf32> + %M = dim %C, 0 : memref<16x16xf32> + %N = dim %C, 1 : memref<16x16xf32> + %K = dim %A, 1 : memref<16x16xf32> %f1 = muli %M, %N : index %f2 = muli %f1, %K : index @@ -34,38 +34,38 @@ func @main() { %c2 = constant 2 : index %f3 = muli %c2, %f2 : index %num_flops = muli %reps, %f3 : index - %num_flops_i = index_cast %num_flops : index to i64 - %num_flops_f = sitofp %num_flops_i : i64 to f64 - %flops = divf %num_flops_f, %t : f64 - call @print_flops(%flops) : (f64) -> () + %num_flops_i = index_cast %num_flops : index to i16 + %num_flops_f = sitofp %num_flops_i : i16 to f16 + %flops = divf %num_flops_f, %t : f16 + call @print_flops(%flops) : (f16) -> () return } -// CHECK: 65, 65, 65, +// CHECK: 17, 17, 17, -func @sgemm_naive(%arg0: memref<64x64xf32>, %arg1: memref<64x64xf32>, %arg2: memref<64x64xf32>) { +func @sgemm_naive(%arg0: memref<16x16xf32>, %arg1: memref<16x16xf32>, %arg2: memref<16x16xf32>) { %c0 = constant 0 : index - affine.for %arg3 = 0 to 64 { - affine.for %arg4 = 0 to 64 { + affine.for %arg3 = 0 to 16 { + affine.for %arg4 = 0 to 16 { %m = alloc() : memref<1xf32> - %v = affine.load %arg2[%arg3, %arg4] : memref<64x64xf32> + %v = affine.load %arg2[%arg3, %arg4] : memref<16x16xf32> affine.store %v, %m[%c0] : memref<1xf32> - affine.for %arg5 = 0 to 64 { - %3 = affine.load %arg0[%arg3, %arg5] : memref<64x64xf32> - %4 = affine.load %arg1[%arg5, %arg4] : memref<64x64xf32> + affine.for %arg5 = 0 to 16 { + %3 = affine.load %arg0[%arg3, %arg5] : memref<16x16xf32> + %4 = affine.load %arg1[%arg5, %arg4] : memref<16x16xf32> %5 = affine.load %m[0] : memref<1xf32> %6 = mulf %3, %4 : f32 %7 = addf %6, %5 : f32 affine.store %7, %m[0] : memref<1xf32> } %s = affine.load %m[%c0] : memref<1xf32> - affine.store %s, %arg2[%arg3, %arg4] : memref<64x64xf32> + affine.store %s, %arg2[%arg3, %arg4] : memref<16x16xf32> dealloc %m : memref<1xf32> } } return } -func @print_flops(f64) -func @rtclock() -> f64 +func @print_flops(f16) +func @rtclock() -> f16 func @print_memref_f32(memref<*xf32>) |