diff options
-rw-r--r-- | mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h | 8 | ||||
-rwxr-xr-x | mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir | 41 |
2 files changed, 40 insertions, 9 deletions
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h index 1418217..eff1aca 100644 --- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h +++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h @@ -465,9 +465,6 @@ private: /// Computes the assembled-size associated with the `l`-th level, /// given the assembled-size associated with the `(l-1)`-th level. - /// "Assembled-sizes" correspond to the (nominal) sizes of overhead - /// storage, as opposed to "level-sizes" which are the cardinality - /// of possible coordinates for that level. uint64_t assembledSize(uint64_t parentSz, uint64_t l) const { if (isCompressedLvl(l)) return positions[l][parentSz]; @@ -764,11 +761,6 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage( // Note that none of the buffers can be reused because ownership // of the memory passed from clients is not necessarily transferred. // Therefore, all data is copied over into a new SparseTensorStorage. - // - // TODO: this needs to be generalized to all formats AND - // we need a proper audit of e.g. double compressed - // levels where some are not filled - // uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0; for (uint64_t l = 0; l < lvlRank; l++) { if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir index 55585a7..c818c23 100755 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_d.mlir @@ -23,6 +23,12 @@ // REDEFINE: %{sparsifier_opts} = enable-runtime-library=false // RUN: %{compile} | %{run} | FileCheck %s +#CCC = #sparse_tensor.encoding<{ + map = (d0, d1, d2) -> (d0 : compressed, d1 : compressed, d2 : compressed), + posWidth = 64, + crdWidth = 32 +}> + #BatchedCSR = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 : compressed), posWidth = 64, @@ -35,7 +41,9 @@ crdWidth = 32 }> -// Test with batched-CSR and CSR-dense. +// +// Test assembly operation with CCC, batched-CSR and CSR-dense. +// module { // // Main driver. @@ -45,6 +53,31 @@ module { %f0 = arith.constant 0.0 : f32 // + // Setup CCC. + // + + %data0 = arith.constant dense< + [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ]> : tensor<8xf32> + %pos00 = arith.constant dense< + [ 0, 3 ]> : tensor<2xi64> + %crd00 = arith.constant dense< + [ 0, 2, 3 ]> : tensor<3xi32> + %pos01 = arith.constant dense< + [ 0, 2, 4, 5 ]> : tensor<4xi64> + %crd01 = arith.constant dense< + [ 0, 1, 1, 2, 1 ]> : tensor<5xi32> + %pos02 = arith.constant dense< + [ 0, 2, 4, 5, 7, 8 ]> : tensor<6xi64> + %crd02 = arith.constant dense< + [ 0, 1, 0, 1, 0, 0, 1, 0 ]> : tensor<8xi32> + + %s0 = sparse_tensor.assemble %data0, %pos00, %crd00, %pos01, %crd01, %pos02, %crd02 : + tensor<8xf32>, + tensor<2xi64>, tensor<3xi32>, + tensor<4xi64>, tensor<5xi32>, + tensor<6xi64>, tensor<8xi32> to tensor<4x3x2xf32, #CCC> + + // // Setup BatchedCSR. // @@ -75,10 +108,15 @@ module { // // Verify. // + // CHECK: ( ( ( 1, 2 ), ( 3, 4 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 0, 0 ), ( 0, 0 ) ), ( ( 0, 0 ), ( 5, 0 ), ( 6, 7 ) ), ( ( 0, 0 ), ( 8, 0 ), ( 0, 0 ) ) ) // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) // CHECK: ( ( ( 1, 2 ), ( 0, 3 ), ( 4, 0 ) ), ( ( 5, 6 ), ( 0, 0 ), ( 0, 7 ) ), ( ( 8, 9 ), ( 10, 11 ), ( 12, 13 ) ), ( ( 14, 0 ), ( 0, 15 ), ( 0, 16 ) ) ) // + %d0 = sparse_tensor.convert %s0 : tensor<4x3x2xf32, #CCC> to tensor<4x3x2xf32> + %v0 = vector.transfer_read %d0[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> + vector.print %v0 : vector<4x3x2xf32> + %d1 = sparse_tensor.convert %s1 : tensor<4x3x2xf32, #BatchedCSR> to tensor<4x3x2xf32> %v1 = vector.transfer_read %d1[%c0, %c0, %c0], %f0 : tensor<4x3x2xf32>, vector<4x3x2xf32> vector.print %v1 : vector<4x3x2xf32> @@ -88,6 +126,7 @@ module { vector.print %v2 : vector<4x3x2xf32> // FIXME: doing this explicitly crashes runtime + // bufferization.dealloc_tensor %s0 : tensor<4x3x2xf32, #CCC> // bufferization.dealloc_tensor %s1 : tensor<4x3x2xf32, #BatchedCSR> // bufferization.dealloc_tensor %s2 : tensor<4x3x2xf32, #CSRDense> return |