[SROA] `isVectorPromotionViable()`: memory intrinsics operate on vectors of bytes (take 2)

This is a recommit of cf624b23bc5d5a6161706d1663def49380ff816a, which was reverted in 5cfc22cafe3f2465e0bb324f8daba82ffcabd0df, because the cut-off on the number of vector elements was not low enough, and it triggered both SDAG SDNode operand number assertions, and caused compile time explosions in some cases. Let's try with something really *REALLY* conservative first, just to get somewhere, and try to bump it (to 64/128) later. FIXME: should this respect TTI reg width * num vec regs? Original commit message: Now, there's a big caveat here - these bytes are abstract bytes, not the i8 we have in LLVM, so strictly speaking this is not exactly legal, see e.g. https://github.com/AliveToolkit/alive2/issues/860 ^ the "bytes" "could" have been a pointer, and loading it as an integer inserts an implicit ptrtoint. But at the same time, InstCombine's `InstCombinerImpl::SimplifyAnyMemTransfer()` would expand a memtransfer of 1/2/4/8 bytes into integer-typed load+store, so this isn't exactly a new problem. Note that in memory, poison is byte-wise, so we really can't widen elements, but SROA seems to be inconsistent here. Fixes #59116.
author: Roman Lebedev <lebedev.ri@gmail.com> 2022-11-26 17:52:58 +0300
committer: Roman Lebedev <lebedev.ri@gmail.com> 2022-11-26 23:19:15 +0300
commit: 3c4d2a03968ccf5889bacffe02d6fa2443b0260f (patch)
tree: 7cbaa275940093ab016cee83570163a662e66e69 /llvm/lib
parent: a677afd3c132dd024fdf81c1bb0b256176895112 (diff)
download: llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.zip
llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.tar.gz
llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.tar.bz2
1 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6dcdd63..a2d7dc2 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1806,8 +1806,10 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
                       ? Ty->getElementType()
                       : FixedVectorType::get(Ty->getElementType(), NumElements);
 
-  Type *SplitIntTy =
-      Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
+  Type *SplitIntTy = nullptr;
+  if (uint64_t Bitwidth = NumElements * ElementSize * 8;
+      Bitwidth <= IntegerType::MAX_INT_BITS)
+    SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth);
 
   Use *U = S.getUse();
 
@@ -1826,7 +1828,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
     // Disable vector promotion when there are loads or stores of an FCA.
     if (LTy->isStructTy())
       return false;
-    if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
+    if (SplitIntTy &&
+        (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
       assert(LTy->isIntegerTy());
       LTy = SplitIntTy;
     }
@@ -1839,7 +1842,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
     // Disable vector promotion when there are loads or stores of an FCA.
     if (STy->isStructTy())
       return false;
-    if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
+    if (SplitIntTy &&
+        (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) {
       assert(STy->isIntegerTy());
       STy = SplitIntTy;
     }
@@ -1889,7 +1893,8 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
 /// SSA value. We only can ensure this for a limited set of operations, and we
 /// don't want to do the rewrites unless we are confident that the result will
 /// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx,
+                                           const DataLayout &DL) {
   // Collect the candidate types for vector-based promotion. Also track whether
   // we have different element types.
   SmallVector<VectorType *, 4> CandidateTys;
@@ -1926,6 +1931,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
       }
     }
   };
+  bool SeenMemTransferInst = false;
   // Consider any loads or stores that are the exact size of the slice.
   for (const Slice &S : P)
     if (S.beginOffset() == P.beginOffset() &&
@@ -1934,8 +1940,29 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
         CheckCandidateType(LI->getType());
       else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
         CheckCandidateType(SI->getValueOperand()->getType());
+      else if (isa<MemTransferInst>(S.getUse()->getUser()))
+        SeenMemTransferInst = true;
     }
 
+  // If we have seen mem transfer intrinsic,
+  // and the partition is small-enough,
+  // enqueue appropriate byte vector.
+  //
+  // The "small-enough" threshold is somewhat arbitrary,
+  // and is mostly dictated by compile-time concerns,
+  // but, at the same time, SDAG SDNode can't handle
+  // more then 65535 operands, so we should not
+  // produce vectors with more than ~32768 elements.
+  //
+  // Perhaps, we should also take into account the TTI:
+  //     `getNumberOfRegisters() * getRegisterBitWidth() / 8` ?
+  //
+  // FIXME: byte type is sticky. If we had any op with byte-typed elements,
+  //        then we should choose that type.
+  if (SeenMemTransferInst && P.size() <= 32)
+    CheckCandidateType(
+        FixedVectorType::get(IntegerType::getInt8Ty(Ctx), P.size()));
+
   // If we didn't find a vector type, nothing to do here.
   if (CandidateTys.empty())
     return nullptr;
@@ -1992,13 +2019,6 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
     CandidateTys.resize(1);
   }
 
-  // FIXME: hack. Do we have a named constant for this?
-  // SDAG SDNode can't have more than 65535 operands.
-  llvm::erase_if(CandidateTys, [](VectorType *VTy) {
-    return cast<FixedVectorType>(VTy)->getNumElements() >
-           std::numeric_limits<unsigned short>::max();
-  });
-
   for (VectorType *VTy : CandidateTys)
     if (checkVectorTypeForPromotion(P, VTy, DL))
       return VTy;
@@ -4323,8 +4343,9 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
 
   bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
 
-  VectorType *VecTy =
-      IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
+  VectorType *VecTy = IsIntegerPromotable
+                          ? nullptr
+                          : isVectorPromotionViable(P, AI.getContext(), DL);
   if (VecTy)
     SliceTy = VecTy;
author	Roman Lebedev <lebedev.ri@gmail.com>	2022-11-26 17:52:58 +0300
committer	Roman Lebedev <lebedev.ri@gmail.com>	2022-11-26 23:19:15 +0300
commit	3c4d2a03968ccf5889bacffe02d6fa2443b0260f (patch)
tree	7cbaa275940093ab016cee83570163a662e66e69 /llvm/lib
parent	a677afd3c132dd024fdf81c1bb0b256176895112 (diff)
download	llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.zip llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.tar.gz llvm-3c4d2a03968ccf5889bacffe02d6fa2443b0260f.tar.bz2