diff options
author | Peiming Liu <36770114+PeimingLiu@users.noreply.github.com> | 2023-12-08 09:22:19 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-08 09:22:19 -0800 |
commit | baa192ea6593499325655021f30d1379fda330e4 (patch) | |
tree | 53eb21d74efd46678fa6d66eadb78c35f26ce113 | |
parent | a539a090009378ecfcfbfaaa280eeac8f5b9d695 (diff) | |
download | llvm-baa192ea6593499325655021f30d1379fda330e4.zip llvm-baa192ea6593499325655021f30d1379fda330e4.tar.gz llvm-baa192ea6593499325655021f30d1379fda330e4.tar.bz2 |
[mlir][sparse] optimize memory loads to SSA values when generating sp… (#74787)
…arse conv.
3 files changed, 224 insertions, 304 deletions
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp index 75121b5..08d37b6 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -147,40 +147,30 @@ static Value genSparseReducedAffineCond(OpBuilder &builder, Location loc, // Helper functions that load/store into the position buffer for slice-driven // loops. -// The sliced pointer buffer is orgnized as: -// [size, curPtr] (two metadata) + [[pLo0, pLo1, pLo2, ...], -// [pHi0, pHi1, pHi2, ...], -// [pNx0, pNx1, pNx2, ...]] +// The sliced pointer buffer is organized as: +// [[pLo0, pLo1, pLo2, ...], +// [pHi0, pHi1, pHi2, ...], +// [pNx0, pNx1, pNx2, ...]] static Value allocSlicePosBuf(OpBuilder &builder, Location loc, Value tupleCnt) { Value bufSz = MULI(tupleCnt, C_IDX(kSliceIterWidth)); // Additional two metadata {memSize, idx} at head. - bufSz = ADDI(bufSz, C_IDX(2)); return genAlloca(builder, loc, bufSz, builder.getIndexType()); } -// TODO: We should use SSA value for it. -// Gets and sets metadata. -static Value loadSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf) { - return genIndexLoad(builder, loc, sPosBuf, C_IDX(1)); -} -static void updateSlicePosPtr(OpBuilder &builder, Location loc, Value sPosBuf, - Value pPtr) { - builder.create<memref::StoreOp>(loc, pPtr, sPosBuf, C_IDX(1)); -} // Gets and sets position values for slice-driven loops. enum class SlicePosKind { kLo, kHi, kNext }; static Value getSlicePosIdx(OpBuilder &builder, Location loc, Value posBuf, Value tupleIdx, SlicePosKind posKind) { Value dim = builder.create<memref::DimOp>(loc, posBuf, C_IDX(0)); - Value tupleCnt = DIVUI(SUBI(dim, C_IDX(2)), C_IDX(kSliceIterWidth)); + Value tupleCnt = DIVUI(dim, C_IDX(kSliceIterWidth)); switch (posKind) { case SlicePosKind::kLo: - return ADDI(tupleIdx, C_IDX(2)); + return tupleIdx; case SlicePosKind::kHi: - return ADDI(tupleIdx, ADDI(tupleCnt, C_IDX(2))); + return ADDI(tupleIdx, tupleCnt); case SlicePosKind::kNext: - return ADDI(tupleIdx, ADDI(tupleCnt, ADDI(tupleCnt, C_IDX(2)))); + return ADDI(tupleIdx, MULI(tupleCnt, C_IDX(2))); } llvm_unreachable("unexpected kind"); } @@ -344,6 +334,9 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, this->dependentLvlMap.assign( numTensors, std::vector<std::vector<std::pair<TensorLevel, unsigned>>>()); this->slicePosBuffer.assign(numTensors, std::vector<std::vector<Value>>()); + this->sliceTupleNxStartIdx.assign(numTensors, std::vector<Value>()); + this->sliceTupleFwdCnt.assign(numTensors, std::vector<Value>()); + this->trivialSlice.assign(numTensors, std::vector<bool>()); this->sliceMeta.assign( numTensors, std::vector<std::vector<std::pair<Value, unsigned>>>()); this->sliceStack.assign(numTensors, std::vector<SliceInfo>()); @@ -394,6 +387,9 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, dependentLvlMap[tid].assign( lvlRank, std::vector<std::pair<TensorLevel, unsigned>>()); slicePosBuffer[tid].assign(lvlRank, std::vector<Value>()); + sliceTupleNxStartIdx[tid].assign(lvlRank, Value()); + sliceTupleFwdCnt[tid].assign(lvlRank, Value()); + trivialSlice[tid].assign(lvlRank, false); sliceMeta[tid].assign(lvlRank, std::vector<std::pair<Value, unsigned>>()); sliceStack[tid].emplace_back(/*minCrd=*/Value(), /*offset=*/Value(), /*isNonEmpty*/ Value(), @@ -806,6 +802,7 @@ std::optional<Value> LoopEmitter::genWhileLoopBody(OpBuilder &builder, assert(ivs.size() == 1); // Coord is the relative offset related to its parents. assert(sliceStack[tid].back().depth == 1 && "TODO: not yet implement"); + sliceTupleFwdCnt[tid][lvl] = SUBI(ivs[0], posits[tid][lvl]); // Update c = absOffset[lvl][depth] - absOffset[lvl][depth - 1] Value posit = ivs[0]; Value crdBuf = coordinatesBuffers[tid][lvl]; @@ -1324,6 +1321,12 @@ void LoopEmitter::enterTensorsAtDenseLvls( } else { posits[tid][lvl] = genAddress(builder, loc, tid, lvl, ADDI(info.offset, iv)); + Value fwdCnt = lvl == 0 || trivialSlice[tid][lvl] + ? C_IDX(0) + : sliceTupleFwdCnt[tid][lvl - 1]; + Value sz = sliceMeta[tid][lvl].back().first; + Value mul = MULI(fwdCnt, sz); + sliceTupleFwdCnt[tid][lvl] = ADDI(mul, iv); } levelReducedDep[tid][lvl]++; } else { @@ -1357,13 +1360,7 @@ void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, assert(isDenseLT(lvlTypes[tid][lvl])); assert(*info.slicedOnLvl == lvl); (void)reduced; - // Resets slices pointers as the resolved slices are invalidated after we - // moves forward to the next slice. - invalidateSliceIterIdx(rewriter, loc, tid, lvl); info.minCrd = info.offset = info.isNonEmpty = Value(); - } else { - forwardsReducedSliceLevelTreeIt(rewriter, loc, tid, lvl, - constantIndex(rewriter, loc, 1)); } levelReducedDep[tid][lvl]--; } @@ -1443,54 +1440,6 @@ void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, } } -void LoopEmitter::forwardsReducedSliceLevelTreeIt(OpBuilder &builder, - Location loc, TensorId tid, - Level rootLvl, Value fcnt) { - - auto stt = getSparseTensorType(tensors[tid]); - - // Finds a [Lvl, leafLvl) range, and all level in between are fully reduced - // sparse levels (but not resolved). Since we forward an iterator at higher - // level of the tree, the subtree need to be pruned. - Level leafLvl = rootLvl + 1; - while (leafLvl < stt.getLvlRank() && depFullyReduced(tid, leafLvl) && - !stt.isDenseLvl(leafLvl)) { - leafLvl++; - } - - Level curLvl = rootLvl + 1; - Value nxPosPtr = nullptr; - if (curLvl < leafLvl) { - assert(!isDenseLT(lvlTypes[tid][curLvl])); - // The first compressed level, setting up the position pointer for it. - Value sPosBuf = slicePosBuffer[tid][curLvl].back(); - // One step forwards in the parent level result in forwarding one `segment` - // in the child sparse level. - Value pPosPtr = loadSlicePosPtr(builder, loc, sPosBuf); // previous ptr - Value cPosPtr = ADDI(fcnt, pPosPtr); // current ptr - updateSlicePosPtr(builder, loc, sPosBuf, cPosPtr); - // Loads the position pointer start for next level. - nxPosPtr = - loadSlicePos(builder, loc, sPosBuf, cPosPtr, SlicePosKind::kNext); - curLvl++; - } - - // TODO: This is not always needed, but we did it unconditionally for now for - // simplicity. - // It is only needed when `curLvl` is forwarded without traversing its child - // level (e.g., the level is in a conjunctive lattices and got pruned), such - // that the position pointer is not forwarded inside the loop. - for (; curLvl < leafLvl; curLvl++) { - assert(nxPosPtr); - if (!isDenseLT(lvlTypes[tid][curLvl])) { - Value sPosBuf = slicePosBuffer[tid][curLvl].back(); - updateSlicePosPtr(builder, loc, sPosBuf, nxPosPtr); - nxPosPtr = - loadSlicePos(builder, loc, sPosBuf, nxPosPtr, SlicePosKind::kNext); - } - } -} - void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc, MutableArrayRef<Value> reduc) { const LoopInfo &loopInfo = loopStack.back(); @@ -1540,13 +1489,6 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc, forwarded = CMPI(eq, coords[tid][lvl], iv); operands.push_back(SELECT(forwarded, nxPos, pos)); } - { - OpBuilder::InsertionGuard guard(builder); - auto ifOp = builder.create<scf::IfOp>(loc, TypeRange{}, forwarded, - /*else=*/false); - builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - forwardsReducedSliceLevelTreeIt(builder, loc, tid, lvl, one); - } // The coordinate is invalid now. coords[tid][lvl] = nullptr; @@ -1916,8 +1858,7 @@ void LoopEmitter::genResolvedSliceBegin(OpBuilder &builder, Location loc, pHi = genIndexLoad(builder, loc, positionsBuffers[tid][lvl], ADDI(posits[tid][lvl - 1], c1)); } - // Fills out pIdxBuffer[tid][lvl][0] with [0, pLo, pHi] - updateSlicePosPtr(builder, loc, sPtrBuf, c0); + // Fills out pIdxBuffer[tid][lvl][0] with [pLo, pHi] updateSlicePos(builder, loc, sPtrBuf, pLo, c0, SlicePosKind::kLo); updateSlicePos(builder, loc, sPtrBuf, pHi, c0, SlicePosKind::kHi); // Slice over a resolved parent, we only need one pair of pos hi and lo to @@ -2056,8 +1997,6 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc, Value isNonEmpty = result[0]; Value minCrd = result[1]; // Two metadata [memSize, idx]. - // TODO: Can use an SSA value for these two metadata - updateSlicePosPtr(builder, loc, sPtrBuf, c0); // FIXME: we need the relative offset related to the base slice. Value absOffset = offsetFromMinCoord(builder, loc, minCrd, remSz, isNonEmpty); sliceStack[tid].emplace_back(minCrd, absOffset, isNonEmpty, result[2], lvl, @@ -2066,16 +2005,30 @@ void LoopEmitter::genUnResolvedSliceBegin(OpBuilder &builder, Location loc, bool LoopEmitter::genSliceBegin(OpBuilder &builder, Location loc, TensorId tid, Level lvl) { + Value curLvlIdx = C_IDX(0); if (depFullyReduced(tid, lvl)) { - // Do not need to prepare for slice driven loop on dense level after it is - // fully reduced. + if (lvl == 0 || trivialSlice[tid][lvl]) { + sliceTupleNxStartIdx[tid][lvl] = C_IDX(0); + } else { + if (isDenseLT(lvlTypes[tid][lvl])) { + sliceTupleNxStartIdx[tid][lvl] = sliceTupleNxStartIdx[tid][lvl - 1]; + } else { + assert(isCompressedLT(lvlTypes[tid][lvl])); + curLvlIdx = ADDI(sliceTupleNxStartIdx[tid][lvl - 1], + sliceTupleFwdCnt[0][lvl - 1]); + sliceTupleNxStartIdx[tid][lvl] = + loadSlicePos(builder, loc, slicePosBuffer[tid][lvl].back(), + curLvlIdx, SlicePosKind::kNext); + } + } if (isDenseLT(lvlTypes[tid][lvl])) return true; + + Value sPosBuf = slicePosBuffer[tid][lvl].back(); // If constraints on the tensor is fully resolved. We do not need to // generates slice begin any more, instead we fall back to TACO-based // algorithm to (co)iterates over the slice. - Value sPosBuf = slicePosBuffer[tid][lvl].back(); - Value tupleIdx = loadSlicePosPtr(builder, loc, sPosBuf); + Value tupleIdx = curLvlIdx; posits[tid][lvl] = loadSlicePos(builder, loc, sPosBuf, tupleIdx, SlicePosKind::kLo); highs[tid][lvl] = @@ -2134,23 +2087,16 @@ bool LoopEmitter::genSliceBegin(OpBuilder &builder, Location loc, TensorId tid, if (sliceInfo.isInitialTensor() || (lvl >= 1 && lvlFullyResolved(tid, lvl - 1))) { // First level or previous level has been full resolved. + trivialSlice[tid][lvl] = true; genResolvedSliceBegin(builder, loc, tid, lvl); } else { // The previous level has not been full resolved. + trivialSlice[tid][lvl] = false; genUnResolvedSliceBegin(builder, loc, tid, lvl); } return false; } -void LoopEmitter::invalidateSliceIterIdx(OpBuilder &builder, Location loc, - TensorId tid, Level lvl) { - for (unsigned i = 0; i <= lvl; i++) { - if (!isDenseLT(lvlTypes[tid][i]) && !dependentLvlMap[tid][i].empty()) { - updateSlicePosPtr(builder, loc, slicePosBuffer[tid][i].back(), C_IDX(0)); - } - } -} - std::tuple<Value, Value, Value> LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc, TensorId tid, Level lvl) { @@ -2175,10 +2121,6 @@ LoopEmitter::genSliceNextInduction(OpBuilder &builder, Location loc, // isNonEmpty = false; // Value absOffset = info.offset; - // Resets slices pointers as the resolved slices are invalidated after we - // moves forward to the next slice. - invalidateSliceIterIdx(builder, loc, tid, lvl); - SmallVector<Value, 3> reduc = {info.minCrd, info.isNonEmpty, absOffset}; Value sPtrBuf = slicePosBuffer[tid][lvl][info.depth - 1]; Value fastPathP = CMPI(ugt, info.minCrd, absOffset); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h index 5e51cb2..fa8b007 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h @@ -453,11 +453,6 @@ private: return tid < lvlTypes.size() && lvl < lvlTypes[tid].size(); } - /// Forwards the (conceptual) "tree iterator" when iterating over a fully - /// reduced slice created by index-reduction. - void forwardsReducedSliceLevelTreeIt(OpBuilder &builder, Location loc, - TensorId tid, Level lvl, Value fcnt); - /// Prepares loop for iterating over `tensor[lvl]`, under the assumption /// that `tensor[0...lvl-1]` loops have already been set up. void prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc, @@ -610,11 +605,6 @@ private: void genUnResolvedSliceBegin(OpBuilder &builder, Location loc, TensorId tid, Level lvl); - /// Invalidates the index kept in slice postion buffers (by setting it to - /// zero). - /// TODO: We should instead use an SSA value for the index. - void invalidateSliceIterIdx(OpBuilder &builder, Location loc, TensorId tid, - Level lvl); /// Generates code to get the first non-empty slice of tid on lvl. /// return true if has already been resolved. bool genSliceBegin(OpBuilder &builder, Location loc, TensorId tid, Level lvl); @@ -683,6 +673,9 @@ private: // But they always starts with the first pidx pointing to coord > slice.offset // to avoid iteration from the beginning. std::vector<std::vector<std::vector<Value>>> slicePosBuffer; + std::vector<std::vector<Value>> sliceTupleNxStartIdx; + std::vector<std::vector<Value>> sliceTupleFwdCnt; + std::vector<std::vector<bool>> trivialSlice; // The (size, stride) for each conceptual slice used for index reduction // loops. diff --git a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir index 02cc5d1..a3c1e76 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir @@ -12,241 +12,226 @@ // CHECK-SAME: %[[VAL_1:.*]]: tensor<3x3xi32>) -> tensor<6x6xi32, #sparse> { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant true // CHECK-DAG: %[[VAL_3:.*]] = arith.constant -2 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 2 : index // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 8 : index // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 3 : index // CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 5 : index -// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_10:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_11:.*]] = arith.constant 0 : i32 -// CHECK-DAG: %[[VAL_12:.*]] = arith.constant false -// CHECK-DAG: %[[VAL_13:.*]] = tensor.empty() : tensor<6x6xi32, #sparse> -// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> -// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> -// CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> -// CHECK-DAG: %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> -// CHECK-DAG: %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse> to memref<?xi32> -// CHECK: %[[VAL_19:.*]] = memref.alloca() : memref<11xindex> -// CHECK: %[[VAL_20:.*]] = memref.alloca() : memref<5xindex> -// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<?xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<5xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_20]]{{\[}}%[[VAL_9]]] : memref<5xindex> -// CHECK: memref.store %[[VAL_21]], %[[VAL_20]]{{\[}}%[[VAL_6]]] : memref<5xindex> -// CHECK: %[[VAL_22:.*]] = arith.cmpi ugt, %[[VAL_21]], %[[VAL_10]] : index -// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_10]]] : memref<?xindex> -// CHECK: %[[VAL_24:.*]] = arith.cmpi uge, %[[VAL_23]], %[[VAL_6]] : index -// CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_22]], %[[VAL_24]] : i1 -// CHECK: %[[VAL_26:.*]] = arith.addi %[[VAL_23]], %[[VAL_3]] : index -// CHECK: %[[VAL_27:.*]] = arith.select %[[VAL_25]], %[[VAL_26]], %[[VAL_10]] : index -// CHECK: %[[VAL_28:.*]]:3 = scf.while (%[[VAL_29:.*]] = %[[VAL_22]], %[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_27]], %[[VAL_32:.*]] = %[[VAL_13]]) : (i1, index, index, tensor<6x6xi32, #sparse>) -> (index, index, tensor<6x6xi32, #sparse>) { -// CHECK: scf.condition(%[[VAL_29]]) %[[VAL_30]], %[[VAL_31]], %[[VAL_32]] : index, index, tensor<6x6xi32, #sparse> +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_10:.*]] = arith.constant false +// CHECK-DAG: %[[VAL_11:.*]] = tensor.empty() : tensor<6x6xi32, #sparse> +// CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> +// CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> +// CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> +// CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse> to memref<?xindex> +// CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse> to memref<?xi32> +// CHECK-DAG: %[[VAL_17:.*]] = memref.alloca() : memref<9xindex> +// CHECK-DAG: %[[VAL_18:.*]] = memref.alloca() : memref<3xindex> +// CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<?xindex> +// CHECK: memref.store %[[VAL_8]], %[[VAL_18]]{{\[}}%[[VAL_8]]] : memref<3xindex> +// CHECK: memref.store %[[VAL_19]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> +// CHECK: %[[VAL_20:.*]] = arith.cmpi ugt, %[[VAL_19]], %[[VAL_8]] : index +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_8]]] : memref<?xindex> +// CHECK: %[[VAL_22:.*]] = arith.cmpi uge, %[[VAL_21]], %[[VAL_6]] : index +// CHECK: %[[VAL_23:.*]] = arith.andi %[[VAL_20]], %[[VAL_22]] : i1 +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_3]] : index +// CHECK: %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_8]] : index +// CHECK: %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_20]], %[[VAL_28:.*]] = %[[VAL_21]], %[[VAL_29:.*]] = %[[VAL_25]], %[[VAL_30:.*]] = %[[VAL_11]]) : (i1, index, index, tensor<6x6xi32, #sparse>) -> (index, index, tensor<6x6xi32, #sparse>) { +// CHECK: scf.condition(%[[VAL_27]]) %[[VAL_28]], %[[VAL_29]], %[[VAL_30]] : index, index, tensor<6x6xi32, #sparse> // CHECK: } do { -// CHECK: ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: tensor<6x6xi32, #sparse>): -// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_9]]] : memref<5xindex> -// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_6]]] : memref<5xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_20]]{{\[}}%[[VAL_4]]] : memref<5xindex> -// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_34]], %[[VAL_6]] : index -// CHECK: %[[VAL_39:.*]]:5 = scf.while (%[[VAL_40:.*]] = %[[VAL_36]], %[[VAL_41:.*]] = %[[VAL_12]], %[[VAL_42:.*]] = %[[VAL_5]], %[[VAL_43:.*]] = %[[VAL_10]], %[[VAL_44:.*]] = %[[VAL_10]]) : (index, i1, index, index, index) -> (index, i1, index, index, index) { -// CHECK: %[[VAL_45:.*]] = arith.cmpi ult, %[[VAL_40]], %[[VAL_37]] : index -// CHECK: %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (i1) { -// CHECK: %[[VAL_47:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_40]]] : memref<?xindex> -// CHECK: %[[VAL_48:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_38]] : index -// CHECK: scf.yield %[[VAL_48]] : i1 +// CHECK: ^bb0(%[[VAL_31:.*]]: index, %[[VAL_32:.*]]: index, %[[VAL_33:.*]]: tensor<6x6xi32, #sparse>): +// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_8]]] : memref<3xindex> +// CHECK: %[[VAL_35:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> +// CHECK: memref.store %[[VAL_8]], %[[VAL_18]]{{\[}}%[[VAL_4]]] : memref<3xindex> +// CHECK: %[[VAL_36:.*]] = arith.addi %[[VAL_32]], %[[VAL_6]] : index +// CHECK: %[[VAL_37:.*]]:5 = scf.while (%[[VAL_38:.*]] = %[[VAL_34]], %[[VAL_39:.*]] = %[[VAL_10]], %[[VAL_40:.*]] = %[[VAL_5]], %[[VAL_41:.*]] = %[[VAL_8]], %[[VAL_42:.*]] = %[[VAL_8]]) : (index, i1, index, index, index) -> (index, i1, index, index, index) { +// CHECK: %[[VAL_43:.*]] = arith.cmpi ult, %[[VAL_38]], %[[VAL_35]] : index +// CHECK: %[[VAL_44:.*]] = scf.if %[[VAL_43]] -> (i1) { +// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_38]]] : memref<?xindex> +// CHECK: %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_36]] : index +// CHECK: scf.yield %[[VAL_46]] : i1 // CHECK: } else { -// CHECK: scf.yield %[[VAL_12]] : i1 +// CHECK: scf.yield %[[VAL_10]] : i1 // CHECK: } -// CHECK: scf.condition(%[[VAL_46]]) %[[VAL_40]], %[[VAL_41]], %[[VAL_42]], %[[VAL_43]], %[[VAL_44]] : index, i1, index, index, index +// CHECK: scf.condition(%[[VAL_44]]) %[[VAL_38]], %[[VAL_39]], %[[VAL_40]], %[[VAL_41]], %[[VAL_42]] : index, i1, index, index, index // CHECK: } do { -// CHECK: ^bb0(%[[VAL_49:.*]]: index, %[[VAL_50:.*]]: i1, %[[VAL_51:.*]]: index, %[[VAL_52:.*]]: index, %[[VAL_53:.*]]: index): -// CHECK: %[[VAL_54:.*]] = arith.addi %[[VAL_49]], %[[VAL_7]] : index -// CHECK: %[[VAL_55:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_49]]] : memref<?xindex> -// CHECK: %[[VAL_56:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_54]]] : memref<?xindex> -// CHECK: %[[VAL_57:.*]] = arith.cmpi ult, %[[VAL_55]], %[[VAL_56]] : index -// CHECK: %[[VAL_58:.*]] = arith.ori %[[VAL_57]], %[[VAL_50]] : i1 -// CHECK: %[[VAL_59:.*]] = scf.if %[[VAL_57]] -> (index) { -// CHECK: %[[VAL_60:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_55]]] : memref<?xindex> -// CHECK: %[[VAL_61:.*]] = arith.cmpi ult, %[[VAL_60]], %[[VAL_51]] : index -// CHECK: %[[VAL_62:.*]] = arith.select %[[VAL_61]], %[[VAL_60]], %[[VAL_51]] : index -// CHECK: scf.yield %[[VAL_62]] : index +// CHECK: ^bb0(%[[VAL_47:.*]]: index, %[[VAL_48:.*]]: i1, %[[VAL_49:.*]]: index, %[[VAL_50:.*]]: index, %[[VAL_51:.*]]: index): +// CHECK: %[[VAL_52:.*]] = arith.addi %[[VAL_47]], %[[VAL_7]] : index +// CHECK: %[[VAL_53:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_47]]] : memref<?xindex> +// CHECK: %[[VAL_54:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_52]]] : memref<?xindex> +// CHECK: %[[VAL_55:.*]] = arith.cmpi ult, %[[VAL_53]], %[[VAL_54]] : index +// CHECK: %[[VAL_56:.*]] = arith.ori %[[VAL_55]], %[[VAL_48]] : i1 +// CHECK: %[[VAL_57:.*]] = scf.if %[[VAL_55]] -> (index) { +// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_53]]] : memref<?xindex> +// CHECK: %[[VAL_59:.*]] = arith.cmpi ult, %[[VAL_58]], %[[VAL_49]] : index +// CHECK: %[[VAL_60:.*]] = arith.select %[[VAL_59]], %[[VAL_58]], %[[VAL_49]] : index +// CHECK: scf.yield %[[VAL_60]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_51]] : index +// CHECK: scf.yield %[[VAL_49]] : index // CHECK: } -// CHECK: %[[VAL_63:.*]] = arith.addi %[[VAL_52]], %[[VAL_9]] : index -// CHECK: memref.store %[[VAL_55]], %[[VAL_19]]{{\[}}%[[VAL_63]]] : memref<11xindex> -// CHECK: %[[VAL_64:.*]] = arith.addi %[[VAL_52]], %[[VAL_8]] : index -// CHECK: memref.store %[[VAL_56]], %[[VAL_19]]{{\[}}%[[VAL_64]]] : memref<11xindex> -// CHECK: %[[VAL_65:.*]] = arith.addi %[[VAL_52]], %[[VAL_7]] : index -// CHECK: %[[VAL_66:.*]] = arith.addi %[[VAL_53]], %[[VAL_7]] : index -// CHECK: scf.yield %[[VAL_54]], %[[VAL_58]], %[[VAL_59]], %[[VAL_65]], %[[VAL_66]] : index, i1, index, index, index +// CHECK: memref.store %[[VAL_53]], %[[VAL_17]]{{\[}}%[[VAL_50]]] : memref<9xindex> +// CHECK: %[[VAL_61:.*]] = arith.addi %[[VAL_50]], %[[VAL_6]] : index +// CHECK: memref.store %[[VAL_54]], %[[VAL_17]]{{\[}}%[[VAL_61]]] : memref<9xindex> +// CHECK: %[[VAL_62:.*]] = arith.addi %[[VAL_50]], %[[VAL_7]] : index +// CHECK: %[[VAL_63:.*]] = arith.addi %[[VAL_51]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_52]], %[[VAL_56]], %[[VAL_57]], %[[VAL_62]], %[[VAL_63]] : index, i1, index, index, index // CHECK: } -// CHECK: memref.store %[[VAL_10]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<11xindex> -// CHECK: %[[VAL_67:.*]] = arith.cmpi uge, %[[VAL_68:.*]]#2, %[[VAL_6]] : index -// CHECK: %[[VAL_69:.*]] = arith.andi %[[VAL_68]]#1, %[[VAL_67]] : i1 -// CHECK: %[[VAL_70:.*]] = arith.addi %[[VAL_68]]#2, %[[VAL_3]] : index -// CHECK: %[[VAL_71:.*]] = arith.select %[[VAL_69]], %[[VAL_70]], %[[VAL_10]] : index -// CHECK: %[[VAL_72:.*]]:3 = scf.while (%[[VAL_73:.*]] = %[[VAL_68]]#1, %[[VAL_74:.*]] = %[[VAL_68]]#2, %[[VAL_75:.*]] = %[[VAL_71]], %[[VAL_76:.*]] = %[[VAL_35]]) : (i1, index, index, tensor<6x6xi32, #sparse>) -> (index, index, tensor<6x6xi32, #sparse>) { -// CHECK: scf.condition(%[[VAL_73]]) %[[VAL_74]], %[[VAL_75]], %[[VAL_76]] : index, index, tensor<6x6xi32, #sparse> +// CHECK: %[[VAL_64:.*]] = arith.cmpi uge, %[[VAL_65:.*]]#2, %[[VAL_6]] : index +// CHECK: %[[VAL_66:.*]] = arith.andi %[[VAL_65]]#1, %[[VAL_64]] : i1 +// CHECK: %[[VAL_67:.*]] = arith.addi %[[VAL_65]]#2, %[[VAL_3]] : index +// CHECK: %[[VAL_68:.*]] = arith.select %[[VAL_66]], %[[VAL_67]], %[[VAL_8]] : index +// CHECK: %[[VAL_69:.*]]:3 = scf.while (%[[VAL_70:.*]] = %[[VAL_65]]#1, %[[VAL_71:.*]] = %[[VAL_65]]#2, %[[VAL_72:.*]] = %[[VAL_68]], %[[VAL_73:.*]] = %[[VAL_33]]) : (i1, index, index, tensor<6x6xi32, #sparse>) -> (index, index, tensor<6x6xi32, #sparse>) { +// CHECK: scf.condition(%[[VAL_70]]) %[[VAL_71]], %[[VAL_72]], %[[VAL_73]] : index, index, tensor<6x6xi32, #sparse> // CHECK: } do { -// CHECK: ^bb0(%[[VAL_77:.*]]: index, %[[VAL_78:.*]]: index, %[[VAL_79:.*]]: tensor<6x6xi32, #sparse>): -// CHECK: %[[VAL_80:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<5xindex> -// CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_80]], %[[VAL_9]] : index -// CHECK: %[[VAL_82:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_81]]] : memref<5xindex> -// CHECK: %[[VAL_83:.*]] = arith.addi %[[VAL_80]], %[[VAL_6]] : index -// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_83]]] : memref<5xindex> -// CHECK: %[[VAL_85:.*]]:3 = scf.while (%[[VAL_86:.*]] = %[[VAL_82]], %[[VAL_87:.*]] = %[[VAL_11]], %[[VAL_88:.*]] = %[[VAL_12]]) : (index, i32, i1) -> (index, i32, i1) { -// CHECK: %[[VAL_89:.*]] = arith.cmpi ult, %[[VAL_86]], %[[VAL_84]] : index -// CHECK: %[[VAL_90:.*]] = scf.if %[[VAL_89]] -> (i1) { -// CHECK: %[[VAL_91:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_86]]] : memref<?xindex> -// CHECK: %[[VAL_92:.*]] = arith.cmpi ult, %[[VAL_91]], %[[VAL_38]] : index -// CHECK: scf.yield %[[VAL_92]] : i1 +// CHECK: ^bb0(%[[VAL_74:.*]]: index, %[[VAL_75:.*]]: index, %[[VAL_76:.*]]: tensor<6x6xi32, #sparse>): +// CHECK: %[[VAL_77:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_8]]] : memref<3xindex> +// CHECK: %[[VAL_78:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> +// CHECK: %[[VAL_79:.*]]:3 = scf.while (%[[VAL_80:.*]] = %[[VAL_77]], %[[VAL_81:.*]] = %[[VAL_9]], %[[VAL_82:.*]] = %[[VAL_10]]) : (index, i32, i1) -> (index, i32, i1) { +// CHECK: %[[VAL_83:.*]] = arith.cmpi ult, %[[VAL_80]], %[[VAL_78]] : index +// CHECK: %[[VAL_84:.*]] = scf.if %[[VAL_83]] -> (i1) { +// CHECK: %[[VAL_85:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_80]]] : memref<?xindex> +// CHECK: %[[VAL_86:.*]] = arith.cmpi ult, %[[VAL_85]], %[[VAL_36]] : index +// CHECK: scf.yield %[[VAL_86]] : i1 // CHECK: } else { -// CHECK: scf.yield %[[VAL_12]] : i1 +// CHECK: scf.yield %[[VAL_10]] : i1 // CHECK: } -// CHECK: scf.condition(%[[VAL_90]]) %[[VAL_86]], %[[VAL_87]], %[[VAL_88]] : index, i32, i1 +// CHECK: scf.condition(%[[VAL_84]]) %[[VAL_80]], %[[VAL_81]], %[[VAL_82]] : index, i32, i1 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_93:.*]]: index, %[[VAL_94:.*]]: i32, %[[VAL_95:.*]]: i1): -// CHECK: %[[VAL_96:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_93]]] : memref<?xindex> -// CHECK: %[[VAL_97:.*]] = arith.subi %[[VAL_96]], %[[VAL_34]] : index -// CHECK: %[[VAL_98:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<11xindex> -// CHECK: %[[VAL_99:.*]] = arith.addi %[[VAL_98]], %[[VAL_9]] : index -// CHECK: %[[VAL_100:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_99]]] : memref<11xindex> -// CHECK: %[[VAL_101:.*]] = arith.addi %[[VAL_98]], %[[VAL_8]] : index -// CHECK: %[[VAL_102:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_101]]] : memref<11xindex> -// CHECK: %[[VAL_103:.*]] = arith.addi %[[VAL_78]], %[[VAL_6]] : index -// CHECK: %[[VAL_104:.*]]:2 = scf.while (%[[VAL_105:.*]] = %[[VAL_100]], %[[VAL_106:.*]] = %[[VAL_94]]) : (index, i32) -> (index, i32) { -// CHECK: %[[VAL_107:.*]] = arith.cmpi ult, %[[VAL_105]], %[[VAL_102]] : index -// CHECK: %[[VAL_108:.*]] = scf.if %[[VAL_107]] -> (i1) { -// CHECK: %[[VAL_109:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_105]]] : memref<?xindex> -// CHECK: %[[VAL_110:.*]] = arith.cmpi ult, %[[VAL_109]], %[[VAL_103]] : index -// CHECK: scf.yield %[[VAL_110]] : i1 +// CHECK: ^bb0(%[[VAL_87:.*]]: index, %[[VAL_88:.*]]: i32, %[[VAL_89:.*]]: i1): +// CHECK: %[[VAL_90:.*]] = arith.subi %[[VAL_87]], %[[VAL_77]] : index +// CHECK: %[[VAL_91:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_87]]] : memref<?xindex> +// CHECK: %[[VAL_92:.*]] = arith.subi %[[VAL_91]], %[[VAL_32]] : index +// CHECK: %[[VAL_93:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_90]]] : memref<9xindex> +// CHECK: %[[VAL_94:.*]] = arith.addi %[[VAL_90]], %[[VAL_6]] : index +// CHECK: %[[VAL_95:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_94]]] : memref<9xindex> +// CHECK: %[[VAL_96:.*]] = arith.addi %[[VAL_75]], %[[VAL_6]] : index +// CHECK: %[[VAL_97:.*]]:2 = scf.while (%[[VAL_98:.*]] = %[[VAL_93]], %[[VAL_99:.*]] = %[[VAL_88]]) : (index, i32) -> (index, i32) { +// CHECK: %[[VAL_100:.*]] = arith.cmpi ult, %[[VAL_98]], %[[VAL_95]] : index +// CHECK: %[[VAL_101:.*]] = scf.if %[[VAL_100]] -> (i1) { +// CHECK: %[[VAL_102:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_98]]] : memref<?xindex> +// CHECK: %[[VAL_103:.*]] = arith.cmpi ult, %[[VAL_102]], %[[VAL_96]] : index +// CHECK: scf.yield %[[VAL_103]] : i1 // CHECK: } else { -// CHECK: scf.yield %[[VAL_12]] : i1 +// CHECK: scf.yield %[[VAL_10]] : i1 // CHECK: } -// CHECK: scf.condition(%[[VAL_108]]) %[[VAL_105]], %[[VAL_106]] : index, i32 +// CHECK: scf.condition(%[[VAL_101]]) %[[VAL_98]], %[[VAL_99]] : index, i32 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_111:.*]]: index, %[[VAL_112:.*]]: i32): -// CHECK: %[[VAL_113:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_111]]] : memref<?xindex> -// CHECK: %[[VAL_114:.*]] = arith.subi %[[VAL_113]], %[[VAL_78]] : index -// CHECK: %[[VAL_115:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_111]]] : memref<?xi32> -// CHECK: %[[VAL_116:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_97]], %[[VAL_114]]] : tensor<3x3xi32> -// CHECK: %[[VAL_117:.*]] = arith.muli %[[VAL_115]], %[[VAL_116]] : i32 -// CHECK: %[[VAL_118:.*]] = arith.addi %[[VAL_112]], %[[VAL_117]] : i32 -// CHECK: %[[VAL_119:.*]] = arith.addi %[[VAL_111]], %[[VAL_7]] : index -// CHECK: scf.yield %[[VAL_119]], %[[VAL_118]] : index, i32 +// CHECK: ^bb0(%[[VAL_104:.*]]: index, %[[VAL_105:.*]]: i32): +// CHECK: %[[VAL_106:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_104]]] : memref<?xindex> +// CHECK: %[[VAL_107:.*]] = arith.subi %[[VAL_106]], %[[VAL_75]] : index +// CHECK: %[[VAL_108:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_104]]] : memref<?xi32> +// CHECK: %[[VAL_109:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_92]], %[[VAL_107]]] : tensor<3x3xi32> +// CHECK: %[[VAL_110:.*]] = arith.muli %[[VAL_108]], %[[VAL_109]] : i32 +// CHECK: %[[VAL_111:.*]] = arith.addi %[[VAL_105]], %[[VAL_110]] : i32 +// CHECK: %[[VAL_112:.*]] = arith.addi %[[VAL_104]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_112]], %[[VAL_111]] : index, i32 // CHECK: } -// CHECK: %[[VAL_120:.*]] = arith.addi %[[VAL_93]], %[[VAL_7]] : index -// CHECK: %[[VAL_121:.*]] = arith.addi %[[VAL_98]], %[[VAL_7]] : index -// CHECK: memref.store %[[VAL_121]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<11xindex> -// CHECK: scf.yield %[[VAL_120]], %[[VAL_122:.*]]#1, %[[VAL_2]] : index, i32, i1 +// CHECK: %[[VAL_113:.*]] = arith.addi %[[VAL_87]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_113]], %[[VAL_114:.*]]#1, %[[VAL_2]] : index, i32, i1 // CHECK: } -// CHECK: %[[VAL_123:.*]] = scf.if %[[VAL_124:.*]]#2 -> (tensor<6x6xi32, #sparse>) { -// CHECK: %[[VAL_125:.*]] = sparse_tensor.insert %[[VAL_124]]#1 into %[[VAL_79]]{{\[}}%[[VAL_34]], %[[VAL_78]]] : tensor<6x6xi32, #sparse> -// CHECK: scf.yield %[[VAL_125]] : tensor<6x6xi32, #sparse> +// CHECK: %[[VAL_115:.*]] = scf.if %[[VAL_116:.*]]#2 -> (tensor<6x6xi32, #sparse>) { +// CHECK: %[[VAL_117:.*]] = sparse_tensor.insert %[[VAL_116]]#1 into %[[VAL_76]]{{\[}}%[[VAL_32]], %[[VAL_75]]] : tensor<6x6xi32, #sparse> +// CHECK: scf.yield %[[VAL_117]] : tensor<6x6xi32, #sparse> // CHECK: } else { -// CHECK: scf.yield %[[VAL_79]] : tensor<6x6xi32, #sparse> +// CHECK: scf.yield %[[VAL_76]] : tensor<6x6xi32, #sparse> // CHECK: } -// CHECK: memref.store %[[VAL_10]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<5xindex> -// CHECK: memref.store %[[VAL_10]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<11xindex> -// CHECK: %[[VAL_126:.*]] = arith.cmpi ugt, %[[VAL_77]], %[[VAL_78]] : index -// CHECK: %[[VAL_127:.*]]:3 = scf.if %[[VAL_126]] -> (index, i1, index) { -// CHECK: %[[VAL_128:.*]] = arith.addi %[[VAL_78]], %[[VAL_7]] : index -// CHECK: scf.yield %[[VAL_77]], %[[VAL_2]], %[[VAL_128]] : index, i1, index +// CHECK: %[[VAL_118:.*]] = arith.cmpi ugt, %[[VAL_74]], %[[VAL_75]] : index +// CHECK: %[[VAL_119:.*]]:3 = scf.if %[[VAL_118]] -> (index, i1, index) { +// CHECK: %[[VAL_120:.*]] = arith.addi %[[VAL_75]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_74]], %[[VAL_2]], %[[VAL_120]] : index, i1, index // CHECK: } else { -// CHECK: %[[VAL_129:.*]]:2 = scf.for %[[VAL_130:.*]] = %[[VAL_10]] to %[[VAL_68]]#3 step %[[VAL_7]] iter_args(%[[VAL_131:.*]] = %[[VAL_5]], %[[VAL_132:.*]] = %[[VAL_12]]) -> (index, i1) { -// CHECK: %[[VAL_133:.*]] = arith.addi %[[VAL_130]], %[[VAL_9]] : index -// CHECK: %[[VAL_134:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_133]]] : memref<11xindex> -// CHECK: %[[VAL_135:.*]] = arith.addi %[[VAL_130]], %[[VAL_8]] : index -// CHECK: %[[VAL_136:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_135]]] : memref<11xindex> -// CHECK: %[[VAL_137:.*]] = arith.cmpi ult, %[[VAL_134]], %[[VAL_136]] : index -// CHECK: %[[VAL_138:.*]] = scf.if %[[VAL_137]] -> (index) { -// CHECK: %[[VAL_139:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_134]]] : memref<?xindex> -// CHECK: %[[VAL_140:.*]] = arith.cmpi eq, %[[VAL_139]], %[[VAL_77]] : index -// CHECK: %[[VAL_141:.*]] = scf.if %[[VAL_140]] -> (index) { -// CHECK: %[[VAL_142:.*]] = arith.addi %[[VAL_134]], %[[VAL_7]] : index -// CHECK: memref.store %[[VAL_142]], %[[VAL_19]]{{\[}}%[[VAL_133]]] : memref<11xindex> -// CHECK: scf.yield %[[VAL_142]] : index +// CHECK: %[[VAL_121:.*]]:2 = scf.for %[[VAL_122:.*]] = %[[VAL_8]] to %[[VAL_65]]#3 step %[[VAL_7]] iter_args(%[[VAL_123:.*]] = %[[VAL_5]], %[[VAL_124:.*]] = %[[VAL_10]]) -> (index, i1) { +// CHECK: %[[VAL_125:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_122]]] : memref<9xindex> +// CHECK: %[[VAL_126:.*]] = arith.addi %[[VAL_122]], %[[VAL_6]] : index +// CHECK: %[[VAL_127:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_126]]] : memref<9xindex> +// CHECK: %[[VAL_128:.*]] = arith.cmpi ult, %[[VAL_125]], %[[VAL_127]] : index +// CHECK: %[[VAL_129:.*]] = scf.if %[[VAL_128]] -> (index) { +// CHECK: %[[VAL_130:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_125]]] : memref<?xindex> +// CHECK: %[[VAL_131:.*]] = arith.cmpi eq, %[[VAL_130]], %[[VAL_74]] : index +// CHECK: %[[VAL_132:.*]] = scf.if %[[VAL_131]] -> (index) { +// CHECK: %[[VAL_133:.*]] = arith.addi %[[VAL_125]], %[[VAL_7]] : index +// CHECK: memref.store %[[VAL_133]], %[[VAL_17]]{{\[}}%[[VAL_122]]] : memref<9xindex> +// CHECK: scf.yield %[[VAL_133]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_134]] : index +// CHECK: scf.yield %[[VAL_125]] : index // CHECK: } -// CHECK: scf.yield %[[VAL_141]] : index +// CHECK: scf.yield %[[VAL_132]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_134]] : index +// CHECK: scf.yield %[[VAL_125]] : index // CHECK: } -// CHECK: %[[VAL_143:.*]] = arith.cmpi ult, %[[VAL_138]], %[[VAL_136]] : index -// CHECK: %[[VAL_144:.*]] = scf.if %[[VAL_143]] -> (index) { -// CHECK: %[[VAL_145:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_138]]] : memref<?xindex> -// CHECK: scf.yield %[[VAL_145]] : index +// CHECK: %[[VAL_134:.*]] = arith.cmpi ult, %[[VAL_129]], %[[VAL_127]] : index +// CHECK: %[[VAL_135:.*]] = scf.if %[[VAL_134]] -> (index) { +// CHECK: %[[VAL_136:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_129]]] : memref<?xindex> +// CHECK: scf.yield %[[VAL_136]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_131]] : index +// CHECK: scf.yield %[[VAL_123]] : index // CHECK: } -// CHECK: %[[VAL_146:.*]] = arith.ori %[[VAL_143]], %[[VAL_132]] : i1 -// CHECK: %[[VAL_147:.*]] = arith.cmpi ult, %[[VAL_144]], %[[VAL_131]] : index -// CHECK: %[[VAL_148:.*]] = arith.select %[[VAL_147]], %[[VAL_144]], %[[VAL_131]] : index -// CHECK: scf.yield %[[VAL_148]], %[[VAL_146]] : index, i1 +// CHECK: %[[VAL_137:.*]] = arith.ori %[[VAL_134]], %[[VAL_124]] : i1 +// CHECK: %[[VAL_138:.*]] = arith.cmpi ult, %[[VAL_135]], %[[VAL_123]] : index +// CHECK: %[[VAL_139:.*]] = arith.select %[[VAL_138]], %[[VAL_135]], %[[VAL_123]] : index +// CHECK: scf.yield %[[VAL_139]], %[[VAL_137]] : index, i1 // CHECK: } -// CHECK: %[[VAL_149:.*]] = arith.addi %[[VAL_150:.*]]#0, %[[VAL_7]] : index -// CHECK: %[[VAL_151:.*]] = arith.addi %[[VAL_150]]#0, %[[VAL_3]] : index -// CHECK: %[[VAL_152:.*]] = arith.cmpi uge, %[[VAL_149]], %[[VAL_6]] : index -// CHECK: %[[VAL_153:.*]] = arith.select %[[VAL_152]], %[[VAL_151]], %[[VAL_10]] : index -// CHECK: scf.yield %[[VAL_150]]#0, %[[VAL_150]]#1, %[[VAL_153]] : index, i1, index +// CHECK: %[[VAL_140:.*]] = arith.addi %[[VAL_141:.*]]#0, %[[VAL_7]] : index +// CHECK: %[[VAL_142:.*]] = arith.addi %[[VAL_141]]#0, %[[VAL_3]] : index +// CHECK: %[[VAL_143:.*]] = arith.cmpi uge, %[[VAL_140]], %[[VAL_6]] : index +// CHECK: %[[VAL_144:.*]] = arith.select %[[VAL_143]], %[[VAL_142]], %[[VAL_8]] : index +// CHECK: scf.yield %[[VAL_141]]#0, %[[VAL_141]]#1, %[[VAL_144]] : index, i1, index // CHECK: } -// CHECK: %[[VAL_154:.*]] = arith.addi %[[VAL_78]], %[[VAL_7]] : index -// CHECK: %[[VAL_155:.*]] = arith.cmpi ugt, %[[VAL_156:.*]]#2, %[[VAL_154]] : index -// CHECK: %[[VAL_157:.*]] = arith.select %[[VAL_155]], %[[VAL_156]]#2, %[[VAL_154]] : index -// CHECK: %[[VAL_158:.*]] = arith.addi %[[VAL_157]], %[[VAL_6]] : index -// CHECK: %[[VAL_159:.*]] = arith.cmpi ule, %[[VAL_158]], %[[VAL_5]] : index -// CHECK: %[[VAL_160:.*]] = arith.andi %[[VAL_156]]#1, %[[VAL_159]] : i1 -// CHECK: scf.yield %[[VAL_160]], %[[VAL_156]]#0, %[[VAL_157]], %[[VAL_123]] : i1, index, index, tensor<6x6xi32, #sparse> +// CHECK: %[[VAL_145:.*]] = arith.addi %[[VAL_75]], %[[VAL_7]] : index +// CHECK: %[[VAL_146:.*]] = arith.cmpi ugt, %[[VAL_147:.*]]#2, %[[VAL_145]] : index +// CHECK: %[[VAL_148:.*]] = arith.select %[[VAL_146]], %[[VAL_147]]#2, %[[VAL_145]] : index +// CHECK: %[[VAL_149:.*]] = arith.addi %[[VAL_148]], %[[VAL_6]] : index +// CHECK: %[[VAL_150:.*]] = arith.cmpi ule, %[[VAL_149]], %[[VAL_5]] : index +// CHECK: %[[VAL_151:.*]] = arith.andi %[[VAL_147]]#1, %[[VAL_150]] : i1 +// CHECK: scf.yield %[[VAL_151]], %[[VAL_147]]#0, %[[VAL_148]], %[[VAL_115]] : i1, index, index, tensor<6x6xi32, #sparse> // CHECK: } -// CHECK: memref.store %[[VAL_10]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<5xindex> -// CHECK: %[[VAL_161:.*]] = arith.cmpi ugt, %[[VAL_33]], %[[VAL_34]] : index -// CHECK: %[[VAL_162:.*]]:3 = scf.if %[[VAL_161]] -> (index, i1, index) { -// CHECK: %[[VAL_163:.*]] = arith.addi %[[VAL_34]], %[[VAL_7]] : index -// CHECK: scf.yield %[[VAL_33]], %[[VAL_2]], %[[VAL_163]] : index, i1, index +// CHECK: %[[VAL_152:.*]] = arith.cmpi ugt, %[[VAL_31]], %[[VAL_32]] : index +// CHECK: %[[VAL_153:.*]]:3 = scf.if %[[VAL_152]] -> (index, i1, index) { +// CHECK: %[[VAL_154:.*]] = arith.addi %[[VAL_32]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_31]], %[[VAL_2]], %[[VAL_154]] : index, i1, index // CHECK: } else { -// CHECK: %[[VAL_164:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_9]]] : memref<5xindex> -// CHECK: %[[VAL_165:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_6]]] : memref<5xindex> -// CHECK: %[[VAL_166:.*]] = arith.cmpi ult, %[[VAL_164]], %[[VAL_165]] : index -// CHECK: %[[VAL_167:.*]] = scf.if %[[VAL_166]] -> (index) { -// CHECK: %[[VAL_168:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_164]]] : memref<?xindex> -// CHECK: %[[VAL_169:.*]] = arith.cmpi eq, %[[VAL_168]], %[[VAL_33]] : index -// CHECK: %[[VAL_170:.*]] = scf.if %[[VAL_169]] -> (index) { -// CHECK: %[[VAL_171:.*]] = arith.addi %[[VAL_164]], %[[VAL_7]] : index -// CHECK: memref.store %[[VAL_171]], %[[VAL_20]]{{\[}}%[[VAL_9]]] : memref<5xindex> -// CHECK: scf.yield %[[VAL_171]] : index +// CHECK: %[[VAL_155:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_8]]] : memref<3xindex> +// CHECK: %[[VAL_156:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<3xindex> +// CHECK: %[[VAL_157:.*]] = arith.cmpi ult, %[[VAL_155]], %[[VAL_156]] : index +// CHECK: %[[VAL_158:.*]] = scf.if %[[VAL_157]] -> (index) { +// CHECK: %[[VAL_159:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_155]]] : memref<?xindex> +// CHECK: %[[VAL_160:.*]] = arith.cmpi eq, %[[VAL_159]], %[[VAL_31]] : index +// CHECK: %[[VAL_161:.*]] = scf.if %[[VAL_160]] -> (index) { +// CHECK: %[[VAL_162:.*]] = arith.addi %[[VAL_155]], %[[VAL_7]] : index +// CHECK: memref.store %[[VAL_162]], %[[VAL_18]]{{\[}}%[[VAL_8]]] : memref<3xindex> +// CHECK: scf.yield %[[VAL_162]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_164]] : index +// CHECK: scf.yield %[[VAL_155]] : index // CHECK: } -// CHECK: scf.yield %[[VAL_170]] : index +// CHECK: scf.yield %[[VAL_161]] : index // CHECK: } else { -// CHECK: scf.yield %[[VAL_164]] : index +// CHECK: scf.yield %[[VAL_155]] : index // CHECK: } -// CHECK: %[[VAL_172:.*]] = arith.cmpi ult, %[[VAL_167]], %[[VAL_165]] : index -// CHECK: %[[VAL_173:.*]] = scf.if %[[VAL_172]] -> (index) { -// CHECK: %[[VAL_174:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_167]]] : memref<?xindex> -// CHECK: scf.yield %[[VAL_174]] : index +// CHECK: %[[VAL_163:.*]] = arith.cmpi ult, %[[VAL_158]], %[[VAL_156]] : index +// CHECK: %[[VAL_164:.*]] = scf.if %[[VAL_163]] -> (index) { +// CHECK: %[[VAL_165:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_158]]] : memref<?xindex> +// CHECK: scf.yield %[[VAL_165]] : index // CHECK: } else { // CHECK: scf.yield %[[VAL_5]] : index // CHECK: } -// CHECK: %[[VAL_175:.*]] = arith.cmpi ult, %[[VAL_173]], %[[VAL_5]] : index -// CHECK: %[[VAL_176:.*]] = arith.select %[[VAL_175]], %[[VAL_173]], %[[VAL_5]] : index -// CHECK: %[[VAL_177:.*]] = arith.addi %[[VAL_176]], %[[VAL_7]] : index -// CHECK: %[[VAL_178:.*]] = arith.addi %[[VAL_176]], %[[VAL_3]] : index -// CHECK: %[[VAL_179:.*]] = arith.cmpi uge, %[[VAL_177]], %[[VAL_6]] : index -// CHECK: %[[VAL_180:.*]] = arith.select %[[VAL_179]], %[[VAL_178]], %[[VAL_10]] : index -// CHECK: scf.yield %[[VAL_176]], %[[VAL_172]], %[[VAL_180]] : index, i1, index +// CHECK: %[[VAL_166:.*]] = arith.cmpi ult, %[[VAL_164]], %[[VAL_5]] : index +// CHECK: %[[VAL_167:.*]] = arith.select %[[VAL_166]], %[[VAL_164]], %[[VAL_5]] : index +// CHECK: %[[VAL_168:.*]] = arith.addi %[[VAL_167]], %[[VAL_7]] : index +// CHECK: %[[VAL_169:.*]] = arith.addi %[[VAL_167]], %[[VAL_3]] : index +// CHECK: %[[VAL_170:.*]] = arith.cmpi uge, %[[VAL_168]], %[[VAL_6]] : index +// CHECK: %[[VAL_171:.*]] = arith.select %[[VAL_170]], %[[VAL_169]], %[[VAL_8]] : index +// CHECK: scf.yield %[[VAL_167]], %[[VAL_163]], %[[VAL_171]] : index, i1, index // CHECK: } -// CHECK: %[[VAL_181:.*]] = arith.addi %[[VAL_34]], %[[VAL_7]] : index -// CHECK: %[[VAL_182:.*]] = arith.cmpi ugt, %[[VAL_183:.*]]#2, %[[VAL_181]] : index -// CHECK: %[[VAL_184:.*]] = arith.select %[[VAL_182]], %[[VAL_183]]#2, %[[VAL_181]] : index -// CHECK: %[[VAL_185:.*]] = arith.addi %[[VAL_184]], %[[VAL_6]] : index -// CHECK: %[[VAL_186:.*]] = arith.cmpi ule, %[[VAL_185]], %[[VAL_5]] : index -// CHECK: %[[VAL_187:.*]] = arith.andi %[[VAL_183]]#1, %[[VAL_186]] : i1 -// CHECK: scf.yield %[[VAL_187]], %[[VAL_183]]#0, %[[VAL_184]], %[[VAL_188:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse> +// CHECK: %[[VAL_172:.*]] = arith.addi %[[VAL_32]], %[[VAL_7]] : index +// CHECK: %[[VAL_173:.*]] = arith.cmpi ugt, %[[VAL_174:.*]]#2, %[[VAL_172]] : index +// CHECK: %[[VAL_175:.*]] = arith.select %[[VAL_173]], %[[VAL_174]]#2, %[[VAL_172]] : index +// CHECK: %[[VAL_176:.*]] = arith.addi %[[VAL_175]], %[[VAL_6]] : index +// CHECK: %[[VAL_177:.*]] = arith.cmpi ule, %[[VAL_176]], %[[VAL_5]] : index +// CHECK: %[[VAL_178:.*]] = arith.andi %[[VAL_174]]#1, %[[VAL_177]] : i1 +// CHECK: scf.yield %[[VAL_178]], %[[VAL_174]]#0, %[[VAL_175]], %[[VAL_179:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse> // CHECK: } -// CHECK: %[[VAL_189:.*]] = sparse_tensor.load %[[VAL_190:.*]]#2 hasInserts : tensor<6x6xi32, #sparse> -// CHECK: return %[[VAL_189]] : tensor<6x6xi32, #sparse> +// CHECK: %[[VAL_180:.*]] = sparse_tensor.load %[[VAL_181:.*]]#2 hasInserts : tensor<6x6xi32, #sparse> +// CHECK: return %[[VAL_180]] : tensor<6x6xi32, #sparse> // CHECK: } func.func @conv2d_all_sparse_CSR(%arg0: tensor<8x8xi32, #DCSR>, %arg1: tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR> { |