diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 38 | 
1 files changed, 32 insertions, 6 deletions
| diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1a279b6..001215a 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1318,6 +1318,11 @@ public:    /// the loop, in which case some special-case heuristics may be used.    bool AllFixupsOutsideLoop = true; +  /// This records whether all of the fixups using this LSRUse are unconditional +  /// within the loop, meaning they will be executed on every path to the loop +  /// latch. This includes fixups before early exits. +  bool AllFixupsUnconditional = true; +    /// RigidFormula is set to true to guarantee that this use will be associated    /// with a single formula--the one that initially matched. Some SCEV    /// expressions cannot be expanded. This allows LSR to consider the registers @@ -1421,16 +1426,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,      if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||          TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {        const SCEV *Start; -      const SCEVConstant *Step; -      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step)))) +      const APInt *Step; +      if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_scev_APInt(Step)))) {          // If the step size matches the base offset, we could use pre-indexed          // addressing. -        if (((AMK & TTI::AMK_PreIndexed) && F.BaseOffset.isFixed() && -             Step->getAPInt() == F.BaseOffset.getFixedValue()) || -            ((AMK & TTI::AMK_PostIndexed) && !isa<SCEVConstant>(Start) && -             SE->isLoopInvariant(Start, L))) +        bool CanPreIndex = (AMK & TTI::AMK_PreIndexed) && +                           F.BaseOffset.isFixed() && +                           *Step == F.BaseOffset.getFixedValue(); +        bool CanPostIndex = (AMK & TTI::AMK_PostIndexed) && +                            !isa<SCEVConstant>(Start) && +                            SE->isLoopInvariant(Start, L); +        // We can only pre or post index when the load/store is unconditional. +        if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)            LoopCost = 0; +      }      } +      // If the loop counts down to zero and we'll be using a hardware loop then      // the addrec will be combined into the hardware loop instruction.      if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() && @@ -1783,6 +1794,9 @@ void LSRUse::print(raw_ostream &OS) const {    if (AllFixupsOutsideLoop)      OS << ", all-fixups-outside-loop"; +  if (AllFixupsUnconditional) +    OS << ", all-fixups-unconditional"; +    if (WidestFixupType)      OS << ", widest fixup type: " << *WidestFixupType;  } @@ -2213,6 +2227,7 @@ class LSRInstance {    void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);    void CountRegisters(const Formula &F, size_t LUIdx);    bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); +  bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;    void CollectLoopInvariantFixupsAndFormulae(); @@ -3607,6 +3622,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {      LF.PostIncLoops = TmpPostIncLoops;      LF.Offset = Offset;      LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); +    LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);      // Create SCEV as Formula for calculating baseline cost      if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { @@ -3680,6 +3696,14 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {    return true;  } +/// Test whether this fixup will be executed each time the corresponding IV +/// increment instruction is executed. +bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const { +  // If the fixup block dominates the IV increment block then there is no path +  // through the loop to the increment that doesn't pass through the fixup. +  return DT.dominates(LF.UserInst->getParent(), IVIncInsertPos->getParent()); +} +  /// Check for other uses of loop-invariant values which we're tracking. These  /// other uses will pin these values in registers, making them less profitable  /// for elimination. @@ -3803,6 +3827,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {          LF.OperandValToReplace = U;          LF.Offset = Offset;          LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); +        LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);          if (!LU.WidestFixupType ||              SE.getTypeSizeInBits(LU.WidestFixupType) <              SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) @@ -4940,6 +4965,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {        LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');        LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; +      LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;        // Transfer the fixups of LU to LUThatHas.        for (LSRFixup &Fixup : LU.Fixups) { | 
