[LoopVectorize][NFC] Refactor widening decision logicusers/skachkov-sc/widen-decision-refactor

author: Sergey Kachkov <sergey.kachkov@syntacore.com> 2023-11-22 17:24:08 +0300
committer: Sergey Kachkov <sergey.kachkov@syntacore.com> 2025-05-19 17:52:27 +0300
commit: 3a5b3c8279527e0d7a24e80756f50b4911062801 (patch)
tree: 4fe4f7c9574b701dde965675ca89442f3d9dd87f
parent: e9bea4167778c726ee0979454fbee2b7c81365fc (diff)
download: llvm-users/skachkov-sc/widen-decision-refactor.zip
llvm-users/skachkov-sc/widen-decision-refactor.tar.gz
llvm-users/skachkov-sc/widen-decision-refactor.tar.bz2
1 files changed, 23 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b2d7c44..490d0af 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1308,9 +1308,10 @@ public:
   getDivRemSpeculationCost(Instruction *I,
                            ElementCount VF) const;
 
-  /// Returns true if \p I is a memory instruction with consecutive memory
-  /// access that can be widened.
-  bool memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
+  /// Returns widening decision (CM_Widen or CM_Widen_Reverse) if \p I is a
+  /// memory instruction with consecutive access that can be widened, or
+  /// CM_Unknown otherwise.
+  InstWidening memoryInstructionCanBeWidened(Instruction *I, ElementCount VF);
 
   /// Returns true if \p I is a memory instruction in an interleaved-group
   /// of memory accesses that can be vectorized with wide vector loads/stores
@@ -1574,7 +1575,8 @@ private:
 
   /// The cost computation for widening instruction \p I with consecutive
   /// memory access.
-  InstructionCost getConsecutiveMemOpCost(Instruction *I, ElementCount VF);
+  InstructionCost getConsecutiveMemOpCost(Instruction *I, ElementCount VF,
+                                          InstWidening Decision);
 
   /// The cost calculation for Load/Store instruction \p I with uniform pointer -
   /// Load: scalar load + broadcast.
@@ -3252,8 +3254,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
                           : TTI.isLegalMaskedStore(Ty, Alignment, AS);
 }
 
-bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
-    Instruction *I, ElementCount VF) {
+LoopVectorizationCostModel::InstWidening
+LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
+                                                          ElementCount VF) {
   // Get and ensure we have a valid memory instruction.
   assert((isa<LoadInst, StoreInst>(I)) && "Invalid memory instruction");
 
@@ -3261,21 +3264,22 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
   auto *ScalarTy = getLoadStoreType(I);
 
   // In order to be widened, the pointer should be consecutive, first of all.
-  if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
-    return false;
+  auto Stride = Legal->isConsecutivePtr(ScalarTy, Ptr);
+  if (!Stride)
+    return CM_Unknown;
 
   // If the instruction is a store located in a predicated block, it will be
   // scalarized.
   if (isScalarWithPredication(I, VF))
-    return false;
+    return CM_Unknown;
 
   // If the instruction's allocated size doesn't equal it's type size, it
   // requires padding and will be scalarized.
   auto &DL = I->getDataLayout();
   if (hasIrregularType(ScalarTy, DL))
-    return false;
+    return CM_Unknown;
 
-  return true;
+  return Stride == 1 ? CM_Widen : CM_Widen_Reverse;
 }
 
 void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
@@ -5569,17 +5573,15 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
   return Cost;
 }
 
-InstructionCost
-LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
-                                                    ElementCount VF) {
+InstructionCost LoopVectorizationCostModel::getConsecutiveMemOpCost(
+    Instruction *I, ElementCount VF, InstWidening Decision) {
   Type *ValTy = getLoadStoreType(I);
   auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));
-  Value *Ptr = getLoadStorePointerOperand(I);
   unsigned AS = getLoadStoreAddressSpace(I);
-  int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
+  enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
 
-  assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
-         "Stride should be 1 or -1 for consecutive memory access");
+  assert((Decision == CM_Widen || Decision == CM_Widen_Reverse) &&
+         "Expected widen decision.");
   const Align Alignment = getLoadStoreAlignment(I);
   InstructionCost Cost = 0;
   if (Legal->isMaskRequired(I)) {
@@ -5591,8 +5593,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
                                 CostKind, OpInfo, I);
   }
 
-  bool Reverse = ConsecutiveStride < 0;
-  if (Reverse)
+  if (Decision == CM_Widen_Reverse)
     Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
                                CostKind, 0);
   return Cost;
@@ -5991,14 +5992,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
       }
 
       // We assume that widening is the best solution when possible.
-      if (memoryInstructionCanBeWidened(&I, VF)) {
-        InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
-        int ConsecutiveStride = Legal->isConsecutivePtr(
-            getLoadStoreType(&I), getLoadStorePointerOperand(&I));
-        assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
-               "Expected consecutive stride.");
-        InstWidening Decision =
-            ConsecutiveStride == 1 ? CM_Widen : CM_Widen_Reverse;
+      if (auto Decision = memoryInstructionCanBeWidened(&I, VF)) {
+        InstructionCost Cost = getConsecutiveMemOpCost(&I, VF, Decision);
         setWideningDecision(&I, VF, Decision, Cost);
         continue;
       }
author	Sergey Kachkov <sergey.kachkov@syntacore.com>	2023-11-22 17:24:08 +0300
committer	Sergey Kachkov <sergey.kachkov@syntacore.com>	2025-05-19 17:52:27 +0300
commit	3a5b3c8279527e0d7a24e80756f50b4911062801 (patch)
tree	4fe4f7c9574b701dde965675ca89442f3d9dd87f
parent	e9bea4167778c726ee0979454fbee2b7c81365fc (diff)
download	llvm-users/skachkov-sc/widen-decision-refactor.zip llvm-users/skachkov-sc/widen-decision-refactor.tar.gz llvm-users/skachkov-sc/widen-decision-refactor.tar.bz2