diff options
Diffstat (limited to 'llvm/lib/Transforms')
19 files changed, 196 insertions, 139 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 7c78eb3..444b390 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -396,9 +396,8 @@ class CHR { } // end anonymous namespace -static inline -raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, - const CHRStats &Stats) { +[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS, + const CHRStats &Stats) { Stats.print(OS); return OS; } @@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { return PSI.isFunctionEntryHot(&F); } -static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, - CHRStats *Stats) { +[[maybe_unused]] static void dumpIR(Function &F, const char *Label, + CHRStats *Stats) { StringRef FuncName = F.getName(); StringRef ModuleName = F.getParent()->getName(); (void)(FuncName); // Unused in release build. @@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope, } // Assert that all the CHR regions of the scope have a biased branch or select. -static void LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { #ifndef NDEBUG auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { @@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { // Assert that all the condition values of the biased branches and selects have // been hoisted to the pre-entry block or outside of the scope. -static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( - CHRScope *Scope, BasicBlock *PreEntryBlock) { +[[maybe_unused]] static void +assertBranchOrSelectConditionHoisted(CHRScope *Scope, + BasicBlock *PreEntryBlock) { CHR_DEBUG(dbgs() << "Biased regions condition values \n"); for (RegInfo &RI : Scope->CHRRegions) { Region *R = RI.R; @@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { } } -static void LLVM_ATTRIBUTE_UNUSED -dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { +[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, + const char *Label) { dbgs() << Label << " " << Scopes.size() << "\n"; for (CHRScope *Scope : Scopes) { dbgs() << *Scope << "\n"; diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 09db464..386e48f 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff; namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, - BBState &BBState) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState); } // end namespace llvm diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h index 232db2b..5cc4212 100644 --- a/llvm/lib/Transforms/ObjCARC/PtrState.h +++ b/llvm/lib/Transforms/ObjCARC/PtrState.h @@ -47,8 +47,7 @@ enum Sequence { S_MovableRelease ///< objc_release(x), !clang.imprecise_release. }; -raw_ostream &operator<<(raw_ostream &OS, - const Sequence S) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S); /// Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 7ad710d..6141b6d 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -77,6 +77,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, return nullptr; } -namespace { // Returns true if \p I is an intrinsic that does not read or write memory. -bool isNoopIntrinsic(Instruction *I) { +static bool isNoopIntrinsic(Instruction *I) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) { } // Check if we can ignore \p D for DSE. -bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { +static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { Instruction *DI = D->getMemoryInst(); // Calls that only access inaccessible memory cannot read or write any memory // locations we consider for elimination. @@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return false; } +namespace { + // A memory location wrapper that represents a MemoryLocation, `MemLoc`, // defined by `MemDef`. struct MemoryLocationWrapper { @@ -889,23 +891,25 @@ struct MemoryDefWrapper { SmallVector<MemoryLocationWrapper, 1> DefinedLocations; }; -bool hasInitializesAttr(Instruction *I) { - CallBase *CB = dyn_cast<CallBase>(I); - return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); -} - struct ArgumentInitInfo { unsigned Idx; bool IsDeadOrInvisibleOnUnwind; ConstantRangeList Inits; }; +} // namespace + +static bool hasInitializesAttr(Instruction *I) { + CallBase *CB = dyn_cast<CallBase>(I); + return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); +} // Return the intersected range list of the initializes attributes of "Args". // "Args" are call arguments that alias to each other. // If any argument in "Args" doesn't have dead_on_unwind attr and // "CallHasNoUnwindAttr" is false, return empty. -ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, - bool CallHasNoUnwindAttr) { +static ConstantRangeList +getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, + bool CallHasNoUnwindAttr) { if (Args.empty()) return {}; @@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, return IntersectedIntervals; } +namespace { + struct DSEState { Function &F; AliasAnalysis &AA; @@ -2328,10 +2334,11 @@ struct DSEState { // change state: whether make any change. bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper); }; +} // namespace // Return true if "Arg" is function local and isn't captured before "CB". -bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, - EarliestEscapeAnalysis &EA) { +static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, + EarliestEscapeAnalysis &EA) { const Value *UnderlyingObj = getUnderlyingObject(Arg); return isIdentifiedFunctionLocal(UnderlyingObj) && capturesNothing( @@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, return MadeChange; } -} // end anonymous namespace //===----------------------------------------------------------------------===// // DSE Pass @@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) -namespace llvm { -LLVM_ABI FunctionPass *createDeadStoreEliminationPass() { +LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSELegacyPass(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index 1c88532..b9534def 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -73,24 +73,17 @@ #include <utility> using namespace llvm; +using namespace llvm::GVNExpression; #define DEBUG_TYPE "gvn-sink" STATISTIC(NumRemoved, "Number of instructions removed"); -namespace llvm { -namespace GVNExpression { - LLVM_DUMP_METHOD void Expression::dump() const { print(dbgs()); dbgs() << "\n"; } -} // end namespace GVNExpression -} // end namespace llvm - -namespace { - static bool isMemoryInst(const Instruction *I) { return isa<LoadInst>(I) || isa<StoreInst>(I) || (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) || @@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) { //===----------------------------------------------------------------------===// +namespace { + /// Candidate solution for sinking. There may be different ways to /// sink instructions, differing in the number of instructions sunk, /// the number of predecessors sunk from and the number of PHIs @@ -125,14 +120,6 @@ struct SinkingInstructionCandidate { } }; -#ifndef NDEBUG -raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { - OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks - << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; - return OS; -} -#endif - //===----------------------------------------------------------------------===// /// Describes a PHI node that may or may not exist. These track the PHIs @@ -256,8 +243,18 @@ public: return Values == Other.Values && Blocks == Other.Blocks; } }; +} // namespace -template <typename ModelledPHI> struct DenseMapInfo { +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks + << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; + return OS; +} +#endif + +template <> struct llvm::DenseMapInfo<ModelledPHI> { static inline ModelledPHI &getEmptyKey() { static ModelledPHI Dummy = ModelledPHI::createDummy(0); return Dummy; @@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo { } }; -using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; +using ModelledPHISet = DenseSet<ModelledPHI>; + +namespace { //===----------------------------------------------------------------------===// // ValueTable @@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; /// /// This class also contains fields for discriminators used when determining /// equivalence of instructions with sideeffects. -class InstructionUseExpr : public GVNExpression::BasicExpression { +class InstructionUseExpr : public BasicExpression { unsigned MemoryUseOrder = -1; bool Volatile = false; ArrayRef<int> ShuffleMask; @@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { public: InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, BumpPtrAllocator &A) - : GVNExpression::BasicExpression(I->getNumUses()) { + : BasicExpression(I->getNumUses()) { allocateOperands(R, A); setOpcode(I->getOpcode()); setType(I->getType()); @@ -315,8 +314,8 @@ public: void setVolatile(bool V) { Volatile = V; } hash_code getHashValue() const override { - return hash_combine(GVNExpression::BasicExpression::getHashValue(), - MemoryUseOrder, Volatile, ShuffleMask); + return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder, + Volatile, ShuffleMask); } template <typename Function> hash_code getHashValue(Function MapFn) { @@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>; class ValueTable { DenseMap<Value *, uint32_t> ValueNumbering; - DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; + DenseMap<Expression *, uint32_t> ExpressionNumbering; DenseMap<size_t, uint32_t> HashNumbering; BumpPtrAllocator Allocator; ArrayRecycler<Value *> Recycler; @@ -594,6 +593,7 @@ private: } } }; +} // namespace std::optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI, @@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, NumRemoved += Insts.size() - 1; } -} // end anonymous namespace - PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { GVNSink G; if (!G.run(F)) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 578fec7..a692009 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -551,12 +551,10 @@ public: } /// Support comparison with a single offset to allow binary searches. - friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS, - uint64_t RHSOffset) { + [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) { return LHS.beginOffset() < RHSOffset; } - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const Slice &RHS) { + [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) { return LHSOffset < RHS.beginOffset(); } diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b18acea..4fe736a 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } Phi.replaceAllUsesWith(RdxResult); - continue; } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b96d29e..62a81ba 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // the vector loop or when not folding the tail. In the later case, we know // that the canonical induction increment will not overflow as the vector trip // count is >= increment and a multiple of the increment. + VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; if (!HasNUW) { - auto *IVInc = Plan->getVectorLoopRegion() - ->getExitingBasicBlock() - ->getTerminator() - ->getOperand(0); - assert(match(IVInc, m_VPInstruction<Instruction::Add>( - m_Specific(Plan->getCanonicalIV()), m_VPValue())) && + auto *IVInc = + LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0); + assert(match(IVInc, + m_VPInstruction<Instruction::Add>( + m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) && "Did not find the canonical IV increment"); cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags(); } @@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. - VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( HeaderVPBB); @@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( for (VPValue *Old : Old2New.keys()) Old->getDefiningRecipe()->eraseFromParent(); - assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) && - !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && + assert(isa<VPRegionBlock>(LoopRegion) && + !LoopRegion->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); @@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { if (ResumePhiIter == MainScalarPH->phis().end()) { VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); ResumePhi = ScalarPHBuilder.createScalarPhi( - {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, - "vec.epilog.resume.val"); + {VectorTC, + MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()}, + {}, "vec.epilog.resume.val"); } else { ResumePhi = cast<VPPhi>(&*ResumePhiIter); if (MainScalarPH->begin() == MainScalarPH->end()) @@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to be // adjusted by the value after the main vector loop. Find the resume value // created during execution of the main VPlan. It must be the first phi in the diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 23f5623..0e0b042 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1012,6 +1012,8 @@ public: // part if scalar. In the latter case, the recipe will be removed during // unrolling. ExtractLastElement, + // Extracts the last lane for each part from its operand. + ExtractLastLanePerPart, // Extracts the second-to-last lane from its operand or the second-to-last // part if it is scalar. In the latter case, the recipe will be removed // during unrolling. @@ -4058,6 +4060,19 @@ public: /// Remove the current region from its VPlan, connecting its predecessor to /// its entry, and its exiting block to its successor. void dissolveToCFGLoop(); + + /// Returns the canonical induction recipe of the region. + VPCanonicalIVPHIRecipe *getCanonicalIV() { + VPBasicBlock *EntryVPBB = getEntryBasicBlock(); + if (EntryVPBB->empty()) { + // VPlan native path. TODO: Unify both code paths. + EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); + } + return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); + } + const VPCanonicalIVPHIRecipe *getCanonicalIV() const { + return const_cast<VPRegionBlock *>(this)->getCanonicalIV(); + } }; /// VPlan models a candidate for vectorization, encoding various decisions take @@ -4252,12 +4267,14 @@ public: BackedgeTakenCount = new VPValue(); return BackedgeTakenCount; } + VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; } /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } /// Returns the VF of the vector loop region. VPValue &getVF() { return VF; }; + const VPValue &getVF() const { return VF; }; /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } @@ -4369,16 +4386,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - /// Returns the canonical induction recipe of the vector loop. - VPCanonicalIVPHIRecipe *getCanonicalIV() { - VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); - if (EntryVPBB->empty()) { - // VPlan native path. - EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); - } - return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); - } - VPValue *getSCEVExpansion(const SCEV *S) const { return SCEVToExpansion.lookup(S); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 07bfe7a..f413c63 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: { Type *BaseTy = inferScalarType(R->getOperand(0)); if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index c0147ce..332791a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, } VPIRMetadata VPBranchWeights; - auto *Term = VPBuilder(CheckBlockVPBB) - .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, - Plan.getCanonicalIV()->getDebugLoc()); + auto *Term = + VPBuilder(CheckBlockVPBB) + .createNaryOp( + VPInstruction::BranchOnCond, {CondVPV}, + Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc()); if (AddBranchWeights) { MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = @@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) { if (DerivedIV->getNumUsers() == 1 && DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) { - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), - &Plan.getVectorTripCount()); + auto *NewSel = Builder.createSelect( + AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount()); DerivedIV->moveAfter(&*Builder.getInsertPoint()); DerivedIV->setOperand(1, NewSel); continue; @@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { "FMaxNum/FMinNum reduction.\n"); return false; } - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV); + auto *NewSel = + Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV); ResumeR->setOperand(0, NewSel); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b42b049..ff286f7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -372,6 +372,12 @@ m_ExtractLastElement(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t> +m_ExtractLastLanePerPart(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0); +} + template <typename Op0_t, typename Op1_t, typename Op2_t> inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t> m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { @@ -394,6 +400,12 @@ m_AnyOf(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::AnyOf>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t> +m_FirstActiveLane(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0); +} + template <unsigned Opcode, typename Op0_t> inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) { return AllRecipe_match<Opcode, Op0_t>(Op0); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 0c27d53..fb17d5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) { // non-phi instructions. auto &Plan = *HeaderVPBB->getPlan(); - auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV()); + auto *IV = + new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV()); Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi()); Builder.insert(IV); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2368d18..775837f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExplicitVectorLength: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: case VPInstruction::Not: @@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) { return ReducedPartRdx; } + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: { - unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2; + unsigned Offset = + getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1; Value *Res; if (State.VF.isVector()) { assert(Offset <= State.VF.getKnownMinValue() && @@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractLastElement || + getOpcode() == VPInstruction::ExtractLastLanePerPart || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::ExtractLane || @@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExtractLane: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::ActiveLaneMask: case VPInstruction::FirstActiveLane: @@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractLastElement: O << "extract-last-element"; break; + case VPInstruction::ExtractLastLanePerPart: + O << "extract-last-lane-per-part"; + break; case VPInstruction::ExtractPenultimateElement: O << "extract-penultimate-element"; break; @@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return false; auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); - auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); + auto *CanIV = getParent()->getParent()->getCanonicalIV(); return StartC && StartC->isZero() && StepC && StepC->isOne() && getScalarType() == CanIV->getScalarType(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 40b7e8d..8d76b2d8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) { /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan) { - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; for (VPUser *U : CanonicalIV->users()) { WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); @@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { if (!WidenNewIV) return; - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); @@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder) { - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); @@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, ScalarEvolution &SE) { VPValue *Incoming, *Mask; if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>( - m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Mask)), - m_VPValue(Incoming)))) + m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming, SE); @@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, return nullptr; // Calculate the final index. - VPValue *EndValue = Plan.getCanonicalIV(); - auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *CanonicalIV = LoopRegion->getCanonicalIV(); + Type *CanonicalIVType = CanonicalIV->getScalarType(); VPBuilder B(cast<VPBasicBlock>(PredVPBB)); DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc(); @@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane); FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType, FirstActiveLaneType, DL); - EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL); + VPValue *EndValue = + B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL); // `getOptimizableIVOf()` always returns the pre-incremented IV, so if it // changed it means the exit is using the incremented value, so we need to @@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } // Look through ExtractLastElement (BuildVector ....). - if (match(&R, m_ExtractLastElement(m_BuildVector()))) { + if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), + m_ExtractLastLanePerPart(m_BuildVector())))) { auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); Def->replaceAllUsesWith( BuildVector->getOperand(BuildVector->getNumOperands() - 1)); @@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) { + if (match(Def, + m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))), + m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) { Def->replaceAllUsesWith(A); return; } - if (match(Def, - m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) && + if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)), + m_ExtractLastLanePerPart(m_VPValue(A)))) && ((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) || (isa<VPReplicateRecipe>(A) && cast<VPReplicateRecipe>(A)->isSingleScalar())) && @@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) { return Def->replaceAllUsesWith(A); } + + if (Plan->getUF() == 1 && + match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) { + return Def->replaceAllUsesWith( + Builder.createNaryOp(VPInstruction::ExtractLastElement, {A})); + } } void VPlanTransforms::simplifyRecipes(VPlan &Plan) { @@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); Clone->insertBefore(RepOrWidenR); - auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement, - {Clone->getOperand(0)}); + unsigned ExtractOpc = + vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) + ? VPInstruction::ExtractLastElement + : VPInstruction::ExtractLastLanePerPart; + auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)}); Ext->insertBefore(Clone); Clone->setOperand(0, Ext); RepR->eraseFromParent(); @@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || match(cast<VPRecipeBase>(U), - m_ExtractLastElement(m_VPValue())); + m_CombineOr(m_ExtractLastElement(m_VPValue()), + m_ExtractLastLanePerPart(m_VPValue()))); })) continue; @@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE); }); - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV(); if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ, m_Specific(CanIV->getBackedgeValue()), m_Specific(&Plan.getVectorTripCount())))) @@ -2319,7 +2334,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = TopRegion->getCanonicalIV(); VPValue *StartV = CanonicalIVPHI->getStartValue(); auto *CanonicalIVIncrement = @@ -2358,7 +2373,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the active lane mask instruction in the VPlan preheader. VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1)); auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC, ALMMultiplier}, DL, "active.lane.mask.entry"); @@ -2394,13 +2409,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( /// TODO: Introduce explicit recipe for header-mask instead of searching /// for the header-mask pattern manually. static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); SmallVector<VPValue *> WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); - assert(count_if(Plan.getCanonicalIV()->users(), + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); + assert(count_if(LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>) <= 1 && "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { + if (FoundWidenCanonicalIVUser != + LoopRegion->getCanonicalIV()->users().end()) { auto *WideCanonicalIV = cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); WideCanonicalIVs.push_back(WideCanonicalIV); @@ -2408,7 +2425,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { // Also include VPWidenIntOrFpInductionRecipes that represent a widened // version of the canonical induction. - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); if (WidenOriginalIV && WidenOriginalIV->isCanonical()) @@ -2441,8 +2458,9 @@ void VPlanTransforms::addActiveLaneMask( "DataAndControlFlowWithoutRuntimeCheck implies " "UseActiveLaneMaskForControlFlow"); - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); assert(FoundWidenCanonicalIVUser && "Must have widened canonical IV when tail folding!"); VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan); @@ -2455,7 +2473,7 @@ void VPlanTransforms::addActiveLaneMask( } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1)); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier}, @@ -2565,9 +2583,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { }); assert(all_of(Plan.getVFxUF().users(), - [&Plan](VPUser *U) { - return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()), - m_Specific(&Plan.getVFxUF()))) || + [&LoopRegion, &Plan](VPUser *U) { + return match(U, + m_c_Add(m_Specific(LoopRegion->getCanonicalIV()), + m_Specific(&Plan.getVFxUF()))) || isa<VPWidenPointerInductionRecipe>(U); }) && "Only users of VFxUF should be VPWidenPointerInductionRecipe and the " @@ -2722,9 +2741,10 @@ void VPlanTransforms::addExplicitVectorLength( VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) { if (Plan.hasScalarVFOnly()) return; - VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); auto *CanIVTy = CanonicalIVPHI->getScalarType(); VPValue *StartV = CanonicalIVPHI->getStartValue(); @@ -4164,7 +4184,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Adjust induction to reflect that the transformed plan only processes one // original iteration. - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = VectorLoop->getCanonicalIV(); auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 1c4adfc..5aeda3e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -69,7 +69,8 @@ class UnrollState { VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { - Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); + Type *CanIVIntTy = + Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType(); return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); } @@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // Compute*Result which combine all parts to compute the final value. VPValue *Op1; if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) || - match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Op1))) || + match(&R, m_FirstActiveLane(m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>( m_VPValue(), m_VPValue(), m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>( diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 66748c5..8b1b0e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { return Expanded; } -bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { +bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { if (isa<VPActiveLaneMaskPHIRecipe>(V)) return true; @@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) return B == Plan.getTripCount() && - (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(), - m_Specific(&Plan.getVF()))) || + (match(A, + m_ScalarIVSteps( + m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), + m_One(), m_Specific(&Plan.getVF()))) || IsWideCanonicalIV(A)); return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && - B == Plan.getOrCreateBackedgeTakenCount(); + B == Plan.getBackedgeTakenCount(); } const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { @@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return all_of(R->operands(), isUniformAcrossVFsAndUFs); } - auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV(); + auto *CanonicalIV = + R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); // Canonical IV chain is uniform. if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) return true; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 0222b0a..cf95ac0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) { } /// Return true if \p V is a header mask in \p Plan. -bool isHeaderMask(const VPValue *V, VPlan &Plan); +bool isHeaderMask(const VPValue *V, const VPlan &Plan); /// Checks if \p V is uniform across all VF lanes and UF parts. It is considered /// as such if it is either loop invariant (defined outside the vector region) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 5262af6..91734a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } } - if (const auto *EVL = dyn_cast<VPInstruction>(&R)) { - if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength && - !verifyEVLRecipe(*EVL)) { - errs() << "EVL VPValue is not used correctly\n"; - return false; + if (const auto *VPI = dyn_cast<VPInstruction>(&R)) { + switch (VPI->getOpcode()) { + case VPInstruction::ExplicitVectorLength: + if (!verifyEVLRecipe(*VPI)) { + errs() << "EVL VPValue is not used correctly\n"; + return false; + } + break; + default: + break; } } } |