diff options
Diffstat (limited to 'llvm/lib/Transforms')
37 files changed, 362 insertions, 266 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp index f166fef..cf7e450 100644 --- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp @@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine(); bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe); if (IsCallerPresplitCoroutine && HasAttr) { - BranchProbability MinBranchProbability( - static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution), - MinBlockCounterExecution); - auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller); - auto Prob = BranchProbability::getBranchProbability( - BFI.getBlockFreq(CB->getParent()).getFrequency(), - BFI.getEntryFreq().getFrequency()); + auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency(); + auto EntryFreq = BFI.getEntryFreq().getFrequency(); + uint64_t MinFreq = + static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio); - if (Prob < MinBranchProbability) { + if (BlockFreq < MinFreq) { ORE.emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' not elided in '" << ore::NV("caller", Caller->getName()) - << "' because of low probability: " - << ore::NV("probability", Prob) << " (threshold: " - << ore::NV("threshold", MinBranchProbability) << ")"; + << "' because of low frequency: " + << ore::NV("block_freq", BlockFreq) + << " (threshold: " << ore::NV("min_freq", MinFreq) << ")"; }); continue; } @@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C, return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller) << "'" << ore::NV("callee", Callee->getName()) << "' elided in '" << ore::NV("caller", Caller->getName()) - << "' (probability: " << ore::NV("probability", Prob) << ")"; + << "' (block_freq: " << ore::NV("block_freq", BlockFreq) + << ")"; }); FAM.invalidate(*Caller, PreservedAnalyses::none()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 4c9b10a..cdc559b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); Type *Ty = CI.getType(); - if (auto *SrcC = dyn_cast<Constant>(Src)) - if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL)) - return replaceInstUsesWith(CI, Res); + if (Value *Res = + simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI))) + return replaceInstUsesWith(CI, Res); // Try to eliminate a cast of a cast. if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 511bca4..6e17801 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, return Mapping; } -namespace llvm { -void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, - bool IsKasan, uint64_t *ShadowBase, - int *MappingScale, bool *OrShadowOffset) { +void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, + bool IsKasan, uint64_t *ShadowBase, + int *MappingScale, bool *OrShadowOffset) { auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan); *ShadowBase = Mapping.Offset; *MappingScale = Mapping.Scale; *OrShadowOffset = Mapping.OrShadowOffset; } -void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) { +void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) { // Sanitizer checks read from shadow, which invalidates memory(argmem: *). // // This is not only true for sanitized functions, because AttrInfer can @@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel, AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite), CompileKernel(CompileKernel) {} -} // namespace llvm - static uint64_t getRedzoneSizeForScale(int MappingScale) { // Redzone used for stack and globals is at least 32 bytes. // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. @@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) { } static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { - if (TargetTriple.isOSEmscripten()) { + if (TargetTriple.isOSEmscripten()) return kAsanEmscriptenCtorAndDtorPriority; - } else { + else return kAsanCtorAndDtorPriority; - } } static Twine genName(StringRef suffix) { diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp index 7c78eb3..72e8e50 100644 --- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp @@ -396,9 +396,8 @@ class CHR { } // end anonymous namespace -static inline -raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, - const CHRStats &Stats) { +[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS, + const CHRStats &Stats) { Stats.print(OS); return OS; } @@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { return PSI.isFunctionEntryHot(&F); } -static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, - CHRStats *Stats) { +[[maybe_unused]] static void dumpIR(Function &F, const char *Label, + CHRStats *Stats) { StringRef FuncName = F.getName(); StringRef ModuleName = F.getParent()->getName(); (void)(FuncName); // Unused in release build. @@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope, } // Assert that all the CHR regions of the scope have a biased branch or select. -static void LLVM_ATTRIBUTE_UNUSED +[[maybe_unused]] static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { #ifndef NDEBUG auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { @@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { // Assert that all the condition values of the biased branches and selects have // been hoisted to the pre-entry block or outside of the scope. -static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( - CHRScope *Scope, BasicBlock *PreEntryBlock) { +[[maybe_unused]] static void +assertBranchOrSelectConditionHoisted(CHRScope *Scope, + BasicBlock *PreEntryBlock) { CHR_DEBUG(dbgs() << "Biased regions condition values \n"); for (RegInfo &RI : Scope->CHRRegions) { Region *R = RI.R; @@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { } } -static void LLVM_ATTRIBUTE_UNUSED -dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { +[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, + const char *Label) { dbgs() << Label << " " << Scopes.size() << "\n"; for (CHRScope *Scope : Scopes) { dbgs() << *Scope << "\n"; @@ -2092,8 +2092,6 @@ bool CHR::run() { return Changed; } -namespace llvm { - ControlHeightReductionPass::ControlHeightReductionPass() { parseCHRFilterFiles(); } @@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run( return PreservedAnalyses::all(); return PreservedAnalyses::none(); } - -} // namespace llvm diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 09db464..386e48f 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff; namespace llvm { -raw_ostream &operator<<(raw_ostream &OS, - BBState &BBState) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState); } // end namespace llvm diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h index 232db2b..5cc4212 100644 --- a/llvm/lib/Transforms/ObjCARC/PtrState.h +++ b/llvm/lib/Transforms/ObjCARC/PtrState.h @@ -47,8 +47,7 @@ enum Sequence { S_MovableRelease ///< objc_release(x), !clang.imprecise_release. }; -raw_ostream &operator<<(raw_ostream &OS, - const Sequence S) LLVM_ATTRIBUTE_UNUSED; +[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S); /// Unidirectional information about either a /// retain-decrement-use-release sequence or release-use-decrement-retain diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 7ad710d..6141b6d 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -77,6 +77,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI, return nullptr; } -namespace { // Returns true if \p I is an intrinsic that does not read or write memory. -bool isNoopIntrinsic(Instruction *I) { +static bool isNoopIntrinsic(Instruction *I) { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: @@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) { } // Check if we can ignore \p D for DSE. -bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { +static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { Instruction *DI = D->getMemoryInst(); // Calls that only access inaccessible memory cannot read or write any memory // locations we consider for elimination. @@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) { return false; } +namespace { + // A memory location wrapper that represents a MemoryLocation, `MemLoc`, // defined by `MemDef`. struct MemoryLocationWrapper { @@ -889,23 +891,25 @@ struct MemoryDefWrapper { SmallVector<MemoryLocationWrapper, 1> DefinedLocations; }; -bool hasInitializesAttr(Instruction *I) { - CallBase *CB = dyn_cast<CallBase>(I); - return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); -} - struct ArgumentInitInfo { unsigned Idx; bool IsDeadOrInvisibleOnUnwind; ConstantRangeList Inits; }; +} // namespace + +static bool hasInitializesAttr(Instruction *I) { + CallBase *CB = dyn_cast<CallBase>(I); + return CB && CB->getArgOperandWithAttribute(Attribute::Initializes); +} // Return the intersected range list of the initializes attributes of "Args". // "Args" are call arguments that alias to each other. // If any argument in "Args" doesn't have dead_on_unwind attr and // "CallHasNoUnwindAttr" is false, return empty. -ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, - bool CallHasNoUnwindAttr) { +static ConstantRangeList +getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, + bool CallHasNoUnwindAttr) { if (Args.empty()) return {}; @@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args, return IntersectedIntervals; } +namespace { + struct DSEState { Function &F; AliasAnalysis &AA; @@ -2328,10 +2334,11 @@ struct DSEState { // change state: whether make any change. bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper); }; +} // namespace // Return true if "Arg" is function local and isn't captured before "CB". -bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, - EarliestEscapeAnalysis &EA) { +static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB, + EarliestEscapeAnalysis &EA) { const Value *UnderlyingObj = getUnderlyingObject(Arg); return isIdentifiedFunctionLocal(UnderlyingObj) && capturesNothing( @@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, return MadeChange; } -} // end anonymous namespace //===----------------------------------------------------------------------===// // DSE Pass @@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false, false) -namespace llvm { -LLVM_ABI FunctionPass *createDeadStoreEliminationPass() { +LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSELegacyPass(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index 1c88532..b9534def 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -73,24 +73,17 @@ #include <utility> using namespace llvm; +using namespace llvm::GVNExpression; #define DEBUG_TYPE "gvn-sink" STATISTIC(NumRemoved, "Number of instructions removed"); -namespace llvm { -namespace GVNExpression { - LLVM_DUMP_METHOD void Expression::dump() const { print(dbgs()); dbgs() << "\n"; } -} // end namespace GVNExpression -} // end namespace llvm - -namespace { - static bool isMemoryInst(const Instruction *I) { return isa<LoadInst>(I) || isa<StoreInst>(I) || (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) || @@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) { //===----------------------------------------------------------------------===// +namespace { + /// Candidate solution for sinking. There may be different ways to /// sink instructions, differing in the number of instructions sunk, /// the number of predecessors sunk from and the number of PHIs @@ -125,14 +120,6 @@ struct SinkingInstructionCandidate { } }; -#ifndef NDEBUG -raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { - OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks - << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; - return OS; -} -#endif - //===----------------------------------------------------------------------===// /// Describes a PHI node that may or may not exist. These track the PHIs @@ -256,8 +243,18 @@ public: return Values == Other.Values && Blocks == Other.Blocks; } }; +} // namespace -template <typename ModelledPHI> struct DenseMapInfo { +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks + << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; + return OS; +} +#endif + +template <> struct llvm::DenseMapInfo<ModelledPHI> { static inline ModelledPHI &getEmptyKey() { static ModelledPHI Dummy = ModelledPHI::createDummy(0); return Dummy; @@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo { } }; -using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; +using ModelledPHISet = DenseSet<ModelledPHI>; + +namespace { //===----------------------------------------------------------------------===// // ValueTable @@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>; /// /// This class also contains fields for discriminators used when determining /// equivalence of instructions with sideeffects. -class InstructionUseExpr : public GVNExpression::BasicExpression { +class InstructionUseExpr : public BasicExpression { unsigned MemoryUseOrder = -1; bool Volatile = false; ArrayRef<int> ShuffleMask; @@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { public: InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, BumpPtrAllocator &A) - : GVNExpression::BasicExpression(I->getNumUses()) { + : BasicExpression(I->getNumUses()) { allocateOperands(R, A); setOpcode(I->getOpcode()); setType(I->getType()); @@ -315,8 +314,8 @@ public: void setVolatile(bool V) { Volatile = V; } hash_code getHashValue() const override { - return hash_combine(GVNExpression::BasicExpression::getHashValue(), - MemoryUseOrder, Volatile, ShuffleMask); + return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder, + Volatile, ShuffleMask); } template <typename Function> hash_code getHashValue(Function MapFn) { @@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>; class ValueTable { DenseMap<Value *, uint32_t> ValueNumbering; - DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; + DenseMap<Expression *, uint32_t> ExpressionNumbering; DenseMap<size_t, uint32_t> HashNumbering; BumpPtrAllocator Allocator; ArrayRecycler<Value *> Recycler; @@ -594,6 +593,7 @@ private: } } }; +} // namespace std::optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI, @@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, NumRemoved += Insts.size() - 1; } -} // end anonymous namespace - PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { GVNSink G; if (!G.run(F)) diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index d99f1eb..ddb99a5 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -75,8 +75,6 @@ static cl::opt<bool> "expressed as branches by widenable conditions"), cl::init(true)); -namespace { - // Get the condition of \p I. It can either be a guard or a conditional branch. static Value *getCondition(Instruction *I) { if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { @@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) { return std::nullopt; } +namespace { + class GuardWideningImpl { DominatorTree &DT; PostDominatorTree *PDT; @@ -328,7 +328,7 @@ public: /// The entry point for this pass. bool run(); }; -} +} // namespace static bool isSupportedGuardInstruction(const Instruction *Insn) { if (isGuard(Insn)) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index c327311..7ebcc21 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -53,6 +53,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" @@ -117,6 +118,10 @@ static cl::opt<bool> LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), cl::desc("Predicate conditions in read only loops")); +static cl::opt<bool> LoopPredicationTraps( + "indvars-predicate-loop-traps", cl::Hidden, cl::init(true), + cl::desc("Predicate conditions that trap in loops with only local writes")); + static cl::opt<bool> AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), cl::desc("Allow widening of indvars to eliminate s/zext")); @@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return Changed; } +static bool crashingBBWithoutEffect(const BasicBlock &BB) { + return llvm::all_of(BB, [](const Instruction &I) { + // TODO: for now this is overly restrictive, to make sure nothing in this + // BB can depend on the loop body. + // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a + // load does not have a side effect, but we could have + // %a = load ptr, ptr %ptr + // %b = load i32, ptr %a + // Now if the loop stored a non-nullptr to %a, we could cause a nullptr + // dereference by skipping over loop iterations. + if (const auto *CB = dyn_cast<CallBase>(&I)) { + if (CB->onlyAccessesInaccessibleMemory()) + return true; + } + return isa<UnreachableInst>(I); + }); +} + bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector<BasicBlock*, 16> ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { // suggestions on how to improve this? I can obviously bail out for outer // loops, but that seems less than ideal. MemorySSA can find memory writes, // is that enough for *all* side effects? + bool HasThreadLocalSideEffects = false; for (BasicBlock *BB : L->blocks()) for (auto &I : *BB) // TODO:isGuaranteedToTransfer - if (I.mayHaveSideEffects()) - return false; + if (I.mayHaveSideEffects()) { + if (!LoopPredicationTraps) + return false; + HasThreadLocalSideEffects = true; + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { + // Simple stores cannot be observed by other threads. + // If HasThreadLocalSideEffects is set, we check + // crashingBBWithoutEffect to make sure that the crashing BB cannot + // observe them either. + if (!SI->isSimple()) + return false; + } else { + return false; + } + } bool Changed = false; // Finally, do the actual predication for all predicatable blocks. A couple @@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); + if (HasThreadLocalSideEffects) { + const BasicBlock *Unreachable = nullptr; + for (const BasicBlock *Succ : BI->successors()) { + if (isa<UnreachableInst>(Succ->getTerminator())) + Unreachable = Succ; + } + // Exit BB which have one branch back into the loop and another one to + // a trap can still be optimized, because local side effects cannot + // be observed in the exit case (the trap). We could be smarter about + // this, but for now lets pattern match common cases that directly trap. + if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable)) + return Changed; + } Value *NewCond; if (ExitCount == ExactBTC) { NewCond = L->contains(BI->getSuccessor(0)) ? diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index 3c14036e..6fb8197 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -26,8 +26,6 @@ using namespace llvm; -namespace llvm { - static cl::opt<unsigned> JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden, cl::desc("Only split jump tables with size less or " @@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold( "or equal than this threshold."), cl::init(50)); +namespace llvm { extern cl::opt<bool> ProfcheckDisableMetadataFixes; - } // end namespace llvm #define DEBUG_TYPE "jump-table-to-switch" diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 9655173..b2c526b 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted, STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions " "reassociated and hoisted out of the loop"); -namespace llvm { - /// Memory promotion is enabled by default. static cl::opt<bool> DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), @@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit( // which may not be precise, since optimizeUses is capped. The result is // correct, but we may not get as "far up" as possible to get which access is // clobbering the one queried. -cl::opt<unsigned> SetLicmMssaOptCap( +cl::opt<unsigned> llvm::SetLicmMssaOptCap( "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls.")); @@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap( // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. -cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap( +cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); +namespace llvm { extern cl::opt<bool> ProfcheckDisableMetadataFixes; - } // end namespace llvm static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); @@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, return false; } -namespace { /// Return true if-and-only-if we know how to (mechanically) both hoist and /// sink a given instruction out of a loop. Does not address legality /// concerns such as aliasing or speculation safety. -bool isHoistableAndSinkableInst(Instruction &I) { +static bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) || isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) || @@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) { } /// Return true if I is the only Instruction with a MemoryAccess in L. -bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, - const MemorySSAUpdater &MSSAU) { +static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, + const MemorySSAUpdater &MSSAU) { for (auto *BB : L->getBlocks()) if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) { int NotAPhi = 0; @@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, } return true; } -} static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA, BatchAAResults &BAA, diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp index 73f1942..7706de8 100644 --- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp +++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp @@ -21,8 +21,7 @@ #define DEBUG_TYPE "loop-bound-split" -namespace llvm { - +using namespace llvm; using namespace PatternMatch; namespace { @@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI, IRBuilder<> Builder(&PostLoopPreHeader->front()); // Update phi nodes in header of post-loop. - bool isExitingLatch = - (L.getExitingBlock() == L.getLoopLatch()) ? true : false; + bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch(); Value *ExitingCondLCSSAPhi = nullptr; for (PHINode &PN : L.getHeader()->phis()) { // Create LCSSA phi node in preheader of post-loop. @@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI, PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U) { - Function &F = *L.getHeader()->getParent(); - (void)F; + [[maybe_unused]] Function &F = *L.getHeader()->getParent(); LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L << "\n"); @@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } - -} // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 32078b1..7da8586 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -16,8 +16,6 @@ using namespace llvm; -namespace llvm { - /// Explicitly specialize the pass manager's run method to handle loop nest /// structure updates. PreservedAnalyses @@ -185,7 +183,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, } return PA; } -} // namespace llvm void FunctionToLoopPassAdaptor::printPipeline( raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { @@ -193,6 +190,7 @@ void FunctionToLoopPassAdaptor::printPipeline( Pass->printPipeline(OS, MapClassName2PassName); OS << ')'; } + PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, FunctionAnalysisManager &AM) { // Before we even compute any loop analyses, first run a miniature function diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 448dc2b..f3e6cbf 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { return Changed; } -namespace llvm { - PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &LAR, LPMUpdater &U) { @@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 80aa98d..5a8f18a 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true), //===----------------------------------------------------------------------===// // Anchor methods. -namespace llvm { -namespace GVNExpression { - Expression::~Expression() = default; BasicExpression::~BasicExpression() = default; CallExpression::~CallExpression() = default; @@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default; AggregateValueExpression::~AggregateValueExpression() = default; PHIExpression::~PHIExpression() = default; -} // end namespace GVNExpression -} // end namespace llvm - namespace { // Tarjan's SCC finding algorithm with Nuutila's improvements diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index ba58b8e..6d7ce36 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { namespace { - class ReassociateLegacyPass : public FunctionPass { - ReassociatePass Impl; +class ReassociateLegacyPass : public FunctionPass { + ReassociatePass Impl; - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - ReassociateLegacyPass() : FunctionPass(ID) { - initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); - } + ReassociateLegacyPass() : FunctionPass(ID) { + initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; - FunctionAnalysisManager DummyFAM; - auto PA = Impl.run(F, DummyFAM); - return !PA.areAllPreserved(); - } + FunctionAnalysisManager DummyFAM; + auto PA = Impl.run(F, DummyFAM); + return !PA.areAllPreserved(); + } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } - }; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; } // end anonymous namespace diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 578fec7..a692009 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -551,12 +551,10 @@ public: } /// Support comparison with a single offset to allow binary searches. - friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS, - uint64_t RHSOffset) { + [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) { return LHS.beginOffset() < RHSOffset; } - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const Slice &RHS) { + [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) { return LHSOffset < RHS.beginOffset(); } diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index aae5d60..25a531c 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -50,9 +50,7 @@ using namespace llvm; #define DEBUG_TYPE "scalarizer" -namespace { - -BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { +static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { BasicBlock *BB = Itr->getParent(); if (isa<PHINode>(Itr)) Itr = BB->getFirstInsertionPt(); @@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; // along with a pointer to their scattered forms. using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; +namespace { + struct VectorSplit { // The type of the vector. FixedVectorType *VecTy = nullptr; @@ -196,6 +196,7 @@ struct VectorLayout { // The size of each (non-remainder) fragment in bytes. uint64_t SplitSize = 0; }; +} // namespace static bool isStructOfMatchingFixedVectors(Type *Ty) { if (!isa<StructType>(Ty)) @@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, return Res; } +namespace { class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { public: ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI, diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index ebcbd2b..fa66a03 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) { return Impl.runImpl(F, TTI); } -namespace llvm { - bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) { if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) { LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because " @@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( return true; } -FunctionPass *createSpeculativeExecutionPass() { +FunctionPass *llvm::createSpeculativeExecutionPass() { return new SpeculativeExecutionLegacyPass(); } -FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() { +FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() { return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true); } @@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline( OS << "only-if-divergent-target"; OS << '>'; } -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 7d01709..e94ad19 100644 --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { return Ret; } -namespace llvm { - PreservedAnalyses StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { const DataLayout *DL = &F.getDataLayout(); @@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve<TargetIRAnalysis>(); return PA; } - -} // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 1d83ddc..89d41f3e 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker { SmallPtrSet<Instruction *, 32> AllocaUsers; SmallPtrSet<Instruction *, 32> EscapePoints; }; -} +} // namespace static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { if (F.callsFunctionThatReturnsTwice()) @@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass { /*BFI=*/nullptr); } }; -} +} // namespace char TailCallElim::ID = 0; INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination", diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index b18acea..4fe736a 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } Phi.replaceAllUsesWith(RdxResult); - continue; } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b96d29e..280eb20 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions( auto CollectExtInfo = [this, &Exts, &ExtOpTypes, &ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool { for (const auto &[I, OpI] : enumerate(Ops)) { - auto *CI = dyn_cast<ConstantInt>(OpI); - if (I > 0 && CI && - canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) { + const APInt *C; + if (I > 0 && match(OpI, m_APInt(C)) && + canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) { ExtOpTypes[I] = ExtOpTypes[0]; ExtKinds[I] = ExtKinds[0]; continue; @@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // the vector loop or when not folding the tail. In the later case, we know // that the canonical induction increment will not overflow as the vector trip // count is >= increment and a multiple of the increment. + VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; if (!HasNUW) { - auto *IVInc = Plan->getVectorLoopRegion() - ->getExitingBasicBlock() - ->getTerminator() - ->getOperand(0); - assert(match(IVInc, m_VPInstruction<Instruction::Add>( - m_Specific(Plan->getCanonicalIV()), m_VPValue())) && + auto *IVInc = + LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0); + assert(match(IVInc, + m_VPInstruction<Instruction::Add>( + m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) && "Did not find the canonical IV increment"); cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags(); } @@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. - VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( HeaderVPBB); @@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( for (VPValue *Old : Old2New.keys()) Old->getDefiningRecipe()->eraseFromParent(); - assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) && - !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() && + assert(isa<VPRegionBlock>(LoopRegion) && + !LoopRegion->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); @@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { if (ResumePhiIter == MainScalarPH->phis().end()) { VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); ResumePhi = ScalarPHBuilder.createScalarPhi( - {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, - "vec.epilog.resume.val"); + {VectorTC, + MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()}, + {}, "vec.epilog.resume.val"); } else { ResumePhi = cast<VPPhi>(&*ResumePhiIter); if (MainScalarPH->begin() == MainScalarPH->end()) @@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV(); // When vectorizing the epilogue loop, the canonical induction needs to be // adjusted by the value after the main vector loop. Find the resume value // created during execution of the main VPlan. It must be the first phi in the diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 0101942..d167009 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif -bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, +bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind) { - APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits()); - unsigned WideSize = CI->getType()->getScalarSizeInBits(); + APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits()); + unsigned WideSize = C->getBitWidth(); APInt ExtendedVal = ExtKind == TTI::PR_SignExtend ? TruncatedVal.sext(WideSize) : TruncatedVal.zext(WideSize); - return ExtendedVal == CI->getValue(); + return ExtendedVal == *C; } TargetTransformInfo::OperandValueInfo diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 23f5623..0e0b042 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1012,6 +1012,8 @@ public: // part if scalar. In the latter case, the recipe will be removed during // unrolling. ExtractLastElement, + // Extracts the last lane for each part from its operand. + ExtractLastLanePerPart, // Extracts the second-to-last lane from its operand or the second-to-last // part if it is scalar. In the latter case, the recipe will be removed // during unrolling. @@ -4058,6 +4060,19 @@ public: /// Remove the current region from its VPlan, connecting its predecessor to /// its entry, and its exiting block to its successor. void dissolveToCFGLoop(); + + /// Returns the canonical induction recipe of the region. + VPCanonicalIVPHIRecipe *getCanonicalIV() { + VPBasicBlock *EntryVPBB = getEntryBasicBlock(); + if (EntryVPBB->empty()) { + // VPlan native path. TODO: Unify both code paths. + EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); + } + return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); + } + const VPCanonicalIVPHIRecipe *getCanonicalIV() const { + return const_cast<VPRegionBlock *>(this)->getCanonicalIV(); + } }; /// VPlan models a candidate for vectorization, encoding various decisions take @@ -4252,12 +4267,14 @@ public: BackedgeTakenCount = new VPValue(); return BackedgeTakenCount; } + VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; } /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } /// Returns the VF of the vector loop region. VPValue &getVF() { return VF; }; + const VPValue &getVF() const { return VF; }; /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } @@ -4369,16 +4386,6 @@ public: LLVM_DUMP_METHOD void dump() const; #endif - /// Returns the canonical induction recipe of the vector loop. - VPCanonicalIVPHIRecipe *getCanonicalIV() { - VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock(); - if (EntryVPBB->empty()) { - // VPlan native path. - EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor()); - } - return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin()); - } - VPValue *getSCEVExpansion(const SCEV *S) const { return SCEVToExpansion.lookup(S); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 07bfe7a..f413c63 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { case VPInstruction::FirstActiveLane: return Type::getIntNTy(Ctx, 64); case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: { Type *BaseTy = inferScalarType(R->getOperand(0)); if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index c0147ce..332791a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, } VPIRMetadata VPBranchWeights; - auto *Term = VPBuilder(CheckBlockVPBB) - .createNaryOp(VPInstruction::BranchOnCond, {CondVPV}, - Plan.getCanonicalIV()->getDebugLoc()); + auto *Term = + VPBuilder(CheckBlockVPBB) + .createNaryOp( + VPInstruction::BranchOnCond, {CondVPV}, + Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc()); if (AddBranchWeights) { MDBuilder MDB(Plan.getContext()); MDNode *BranchWeights = @@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) { if (DerivedIV->getNumUsers() == 1 && DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) { - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), - &Plan.getVectorTripCount()); + auto *NewSel = Builder.createSelect( + AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount()); DerivedIV->moveAfter(&*Builder.getInsertPoint()); DerivedIV->setOperand(1, NewSel); continue; @@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { "FMaxNum/FMinNum reduction.\n"); return false; } - auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV); + auto *NewSel = + Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV); ResumeR->setOperand(0, NewSel); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 1580a3b..2aaabd9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -474,7 +474,7 @@ public: /// Check if a constant \p CI can be safely treated as having been extended /// from a narrower type with the given extension kind. -bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, +bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind); } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b42b049..d8203e2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() { /// For vectors, this includes constants with undefined elements. inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); } -struct bind_const_int { - uint64_t &Res; +struct bind_apint { + const APInt *&Res; - bind_const_int(uint64_t &Res) : Res(Res) {} + bind_apint(const APInt *&Res) : Res(Res) {} bool match(VPValue *VPV) const { if (!VPV->isLiveIn()) @@ -188,7 +188,23 @@ struct bind_const_int { const auto *CI = dyn_cast<ConstantInt>(V); if (!CI) return false; - if (auto C = CI->getValue().tryZExtValue()) { + Res = &CI->getValue(); + return true; + } +}; + +inline bind_apint m_APInt(const APInt *&C) { return C; } + +struct bind_const_int { + uint64_t &Res; + + bind_const_int(uint64_t &Res) : Res(Res) {} + + bool match(VPValue *VPV) const { + const APInt *APConst; + if (!bind_apint(APConst).match(VPV)) + return false; + if (auto C = APConst->tryZExtValue()) { Res = *C; return true; } @@ -372,6 +388,12 @@ m_ExtractLastElement(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t> +m_ExtractLastLanePerPart(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0); +} + template <typename Op0_t, typename Op1_t, typename Op2_t> inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t> m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { @@ -394,6 +416,12 @@ m_AnyOf(const Op0_t &Op0) { return m_VPInstruction<VPInstruction::AnyOf>(Op0); } +template <typename Op0_t> +inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t> +m_FirstActiveLane(const Op0_t &Op0) { + return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0); +} + template <unsigned Opcode, typename Op0_t> inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) { return AllRecipe_match<Opcode, Op0_t>(Op0); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 0c27d53..fb17d5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) { // non-phi instructions. auto &Plan = *HeaderVPBB->getPlan(); - auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV()); + auto *IV = + new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV()); Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi()); Builder.insert(IV); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2368d18..7a98c75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, ExtAType = GetExtendKind(ExtAR); ExtBType = GetExtendKind(ExtBR); - if (!ExtBR && Widen->getOperand(1)->isLiveIn()) { - auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue()); - if (canConstantBeExtended(CI, InputTypeA, ExtAType)) { - InputTypeB = InputTypeA; - ExtBType = ExtAType; - } + using namespace VPlanPatternMatch; + const APInt *C; + if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) && + canConstantBeExtended(C, InputTypeA, ExtAType)) { + InputTypeB = InputTypeA; + ExtBType = ExtAType; } }; @@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExplicitVectorLength: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::FirstActiveLane: case VPInstruction::Not: @@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) { return ReducedPartRdx; } + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractLastElement: case VPInstruction::ExtractPenultimateElement: { - unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2; + unsigned Offset = + getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1; Value *Res; if (State.VF.isVector()) { assert(Offset <= State.VF.getKnownMinValue() && @@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractLastElement || + getOpcode() == VPInstruction::ExtractLastLanePerPart || getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::ExtractLane || @@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::ExtractLane: case VPInstruction::ExtractLastElement: + case VPInstruction::ExtractLastLanePerPart: case VPInstruction::ExtractPenultimateElement: case VPInstruction::ActiveLaneMask: case VPInstruction::FirstActiveLane: @@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractLastElement: O << "extract-last-element"; break; + case VPInstruction::ExtractLastLanePerPart: + O << "extract-last-lane-per-part"; + break; case VPInstruction::ExtractPenultimateElement: O << "extract-penultimate-element"; break; @@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return false; auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); - auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin()); + auto *CanIV = getParent()->getParent()->getCanonicalIV(); return StartC && StartC->isZero() && StepC && StepC->isOne() && getScalarType() == CanIV->getScalarType(); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 40b7e8d..cae9aee8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) { /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV /// recipe, if it exists. static void removeRedundantCanonicalIVs(VPlan &Plan) { - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; for (VPUser *U : CanonicalIV->users()) { WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U); @@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { if (!WidenNewIV) return; - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); @@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder) { - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); - VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); + VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV(); VPSingleDefRecipe *BaseIV = Builder.createDerivedIV( Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx"); @@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, ScalarEvolution &SE) { VPValue *Incoming, *Mask; if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>( - m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Mask)), - m_VPValue(Incoming)))) + m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming)))) return nullptr; auto *WideIV = getOptimizableIVOf(Incoming, SE); @@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, return nullptr; // Calculate the final index. - VPValue *EndValue = Plan.getCanonicalIV(); - auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *CanonicalIV = LoopRegion->getCanonicalIV(); + Type *CanonicalIVType = CanonicalIV->getScalarType(); VPBuilder B(cast<VPBasicBlock>(PredVPBB)); DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc(); @@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane); FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType, FirstActiveLaneType, DL); - EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL); + VPValue *EndValue = + B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL); // `getOptimizableIVOf()` always returns the pre-incremented IV, so if it // changed it means the exit is using the incremented value, so we need to @@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } // Look through ExtractLastElement (BuildVector ....). - if (match(&R, m_ExtractLastElement(m_BuildVector()))) { + if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()), + m_ExtractLastLanePerPart(m_BuildVector())))) { auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); Def->replaceAllUsesWith( BuildVector->getOperand(BuildVector->getNumOperands() - 1)); @@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } - if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) { + if (match(Def, + m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))), + m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) { Def->replaceAllUsesWith(A); return; } - if (match(Def, - m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) && + if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)), + m_ExtractLastLanePerPart(m_VPValue(A)))) && ((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) || (isa<VPReplicateRecipe>(A) && cast<VPReplicateRecipe>(A)->isSingleScalar())) && @@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) { return Def->replaceAllUsesWith(A); } + + if (Plan->getUF() == 1 && + match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) { + return Def->replaceAllUsesWith( + Builder.createNaryOp(VPInstruction::ExtractLastElement, {A})); + } } void VPlanTransforms::simplifyRecipes(VPlan &Plan) { @@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); Clone->insertBefore(RepOrWidenR); - auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement, - {Clone->getOperand(0)}); + unsigned ExtractOpc = + vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) + ? VPInstruction::ExtractLastElement + : VPInstruction::ExtractLastLanePerPart; + auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)}); Ext->insertBefore(Clone); Clone->setOperand(0, Ext); RepR->eraseFromParent(); @@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || match(cast<VPRecipeBase>(U), - m_ExtractLastElement(m_VPValue())); + m_CombineOr(m_ExtractLastElement(m_VPValue()), + m_ExtractLastLanePerPart(m_VPValue()))); })) continue; @@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE); }); - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV(); if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ, m_Specific(CanIV->getBackedgeValue()), m_Specific(&Plan.getVectorTripCount())))) @@ -2107,9 +2122,18 @@ static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); // Return true if we do not know how to (mechanically) hoist a given recipe - // out of a loop region. Does not address legality concerns such as aliasing - // or speculation safety. + // out of a loop region. auto CannotHoistRecipe = [](VPRecipeBase &R) { + // Assumes don't alias anything or throw; as long as they're guaranteed to + // execute, they're safe to hoist. + if (match(&R, m_Intrinsic<Intrinsic::assume>())) + return false; + + // TODO: Relax checks in the future, e.g. we could also hoist reads, if + // their memory location is not modified in the vector loop. + if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi()) + return true; + // Allocas cannot be hoisted. auto *RepR = dyn_cast<VPReplicateRecipe>(&R); return RepR && RepR->getOpcode() == Instruction::Alloca; @@ -2117,17 +2141,18 @@ static void licm(VPlan &Plan) { // Hoist any loop invariant recipes from the vector loop region to the // preheader. Preform a shallow traversal of the vector loop region, to - // exclude recipes in replicate regions. + // exclude recipes in replicate regions. Since the top-level blocks in the + // vector loop region are guaranteed to execute if the vector pre-header is, + // we don't need to check speculation safety. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + assert(Preheader->getSingleSuccessor() == LoopRegion && + "Expected vector prehader's successor to be the vector loop region"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (CannotHoistRecipe(R)) continue; - // TODO: Relax checks in the future, e.g. we could also hoist reads, if - // their memory location is not modified in the vector loop. - if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() || - any_of(R.operands(), [](VPValue *Op) { + if (any_of(R.operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); })) continue; @@ -2319,7 +2344,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) { VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *EB = TopRegion->getExitingBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = TopRegion->getCanonicalIV(); VPValue *StartV = CanonicalIVPHI->getStartValue(); auto *CanonicalIVIncrement = @@ -2358,7 +2383,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( // Create the active lane mask instruction in the VPlan preheader. VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1)); auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC, ALMMultiplier}, DL, "active.lane.mask.entry"); @@ -2394,13 +2419,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( /// TODO: Introduce explicit recipe for header-mask instead of searching /// for the header-mask pattern manually. static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); SmallVector<VPValue *> WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); - assert(count_if(Plan.getCanonicalIV()->users(), + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); + assert(count_if(LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>) <= 1 && "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) { + if (FoundWidenCanonicalIVUser != + LoopRegion->getCanonicalIV()->users().end()) { auto *WideCanonicalIV = cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); WideCanonicalIVs.push_back(WideCanonicalIV); @@ -2408,7 +2435,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) { // Also include VPWidenIntOrFpInductionRecipes that represent a widened // version of the canonical induction. - VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock(); for (VPRecipeBase &Phi : HeaderVPBB->phis()) { auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); if (WidenOriginalIV && WidenOriginalIV->isCanonical()) @@ -2441,8 +2468,9 @@ void VPlanTransforms::addActiveLaneMask( "DataAndControlFlowWithoutRuntimeCheck implies " "UseActiveLaneMaskForControlFlow"); - auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(), - IsaPred<VPWidenCanonicalIVRecipe>); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + auto *FoundWidenCanonicalIVUser = find_if( + LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>); assert(FoundWidenCanonicalIVUser && "Must have widened canonical IV when tail folding!"); VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan); @@ -2455,7 +2483,7 @@ void VPlanTransforms::addActiveLaneMask( } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1)); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier}, @@ -2565,9 +2593,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { }); assert(all_of(Plan.getVFxUF().users(), - [&Plan](VPUser *U) { - return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()), - m_Specific(&Plan.getVFxUF()))) || + [&LoopRegion, &Plan](VPUser *U) { + return match(U, + m_c_Add(m_Specific(LoopRegion->getCanonicalIV()), + m_Specific(&Plan.getVFxUF()))) || isa<VPWidenPointerInductionRecipe>(U); }) && "Only users of VFxUF should be VPWidenPointerInductionRecipe and the " @@ -2722,9 +2751,10 @@ void VPlanTransforms::addExplicitVectorLength( VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) { if (Plan.hasScalarVFOnly()) return; - VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); - auto *CanonicalIVPHI = Plan.getCanonicalIV(); + auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); auto *CanIVTy = CanonicalIVPHI->getScalarType(); VPValue *StartV = CanonicalIVPHI->getStartValue(); @@ -4164,7 +4194,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, // Adjust induction to reflect that the transformed plan only processes one // original iteration. - auto *CanIV = Plan.getCanonicalIV(); + auto *CanIV = VectorLoop->getCanonicalIV(); auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 1c4adfc..5aeda3e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -69,7 +69,8 @@ class UnrollState { VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { - Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); + Type *CanIVIntTy = + Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType(); return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); } @@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // Compute*Result which combine all parts to compute the final value. VPValue *Op1; if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) || - match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>( - m_VPValue(Op1))) || + match(&R, m_FirstActiveLane(m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>( m_VPValue(), m_VPValue(), m_VPValue(Op1))) || match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>( diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 66748c5..8b1b0e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) { return Expanded; } -bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { +bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { if (isa<VPActiveLaneMaskPHIRecipe>(V)) return true; @@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) { if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One()))) return B == Plan.getTripCount() && - (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(), - m_Specific(&Plan.getVF()))) || + (match(A, + m_ScalarIVSteps( + m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()), + m_One(), m_Specific(&Plan.getVF()))) || IsWideCanonicalIV(A)); return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && - B == Plan.getOrCreateBackedgeTakenCount(); + B == Plan.getBackedgeTakenCount(); } const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { @@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) { return all_of(R->operands(), isUniformAcrossVFsAndUFs); } - auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV(); + auto *CanonicalIV = + R->getParent()->getEnclosingLoopRegion()->getCanonicalIV(); // Canonical IV chain is uniform. if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue()) return true; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 0222b0a..cf95ac0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) { } /// Return true if \p V is a header mask in \p Plan. -bool isHeaderMask(const VPValue *V, VPlan &Plan); +bool isHeaderMask(const VPValue *V, const VPlan &Plan); /// Checks if \p V is uniform across all VF lanes and UF parts. It is considered /// as such if it is either loop invariant (defined outside the vector region) diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 5262af6..91734a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { return false; } } - if (const auto *EVL = dyn_cast<VPInstruction>(&R)) { - if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength && - !verifyEVLRecipe(*EVL)) { - errs() << "EVL VPValue is not used correctly\n"; - return false; + if (const auto *VPI = dyn_cast<VPInstruction>(&R)) { + switch (VPI->getOpcode()) { + case VPInstruction::ExplicitVectorLength: + if (!verifyEVLRecipe(*VPI)) { + errs() << "EVL VPValue is not used correctly\n"; + return false; + } + break; + default: + break; } } } |