diff options
Diffstat (limited to 'llvm/lib/Transforms')
24 files changed, 215 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 83aa7de..28ee444 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -72,6 +72,7 @@ STATISTIC(NumImportedModules, "Number of modules imported from"); STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); STATISTIC(NumLiveSymbols, "Number of live symbols in index"); +namespace llvm { cl::opt<bool> ForceImportAll("force-import-all", cl::init(false), cl::Hidden, cl::desc("Import functions with noinline attribute")); @@ -185,9 +186,8 @@ static cl::opt<bool> CtxprofMoveRootsToOwnModule( extern cl::list<GlobalValue::GUID> MoveSymbolGUID; -namespace llvm { extern cl::opt<bool> EnableMemProfContextDisambiguation; -} +} // end namespace llvm // Load lazily a module from \p FileName in \p Context. static std::unique_ptr<Module> loadFile(const std::string &FileName, diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 4f53738..150a2dc 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -28,10 +28,13 @@ using namespace llvm; STATISTIC(NumSpecsCreated, "Number of specializations created"); +namespace llvm { + static cl::opt<bool> ForceSpecialization( - "force-specialization", cl::init(false), cl::Hidden, cl::desc( - "Force function specialization for every call site with a constant " - "argument")); + "force-specialization", cl::init(false), cl::Hidden, + cl::desc( + "Force function specialization for every call site with a constant " + "argument")); static cl::opt<unsigned> MaxClones( "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc( @@ -91,6 +94,8 @@ static cl::opt<bool> SpecializeLiteralConstant( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const { unsigned I = 0; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 15f4d76..c4f1b68 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -214,11 +214,12 @@ static cl::opt<bool> MemProfRequireDefinitionForPromotion( "memprof-require-definition-for-promotion", cl::init(false), cl::Hidden, cl::desc( "Require target function definition when promoting indirect calls")); -} // namespace llvm extern cl::opt<bool> MemProfReportHintedSizes; extern cl::opt<unsigned> MinClonedColdBytePercent; +} // namespace llvm + namespace { /// CRTP base for graphs built from either IR or ThinLTO summary index. /// diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 99b8b88..e39e311 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -116,6 +116,8 @@ STATISTIC( NumCSInlinedHitGrowthLimit, "Number of functions with FDO inline stopped due to growth size limit"); +namespace llvm { + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt<std::string> SampleProfileFile( @@ -198,7 +200,6 @@ static cl::opt<bool> DisableSampleLoaderInlining( "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee).")); -namespace llvm { cl::opt<bool> SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden, cl::desc("Sort profiled recursion by edge weights.")); diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 093a39e..70b8614 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -23,6 +23,8 @@ using namespace sampleprof; #define DEBUG_TYPE "sample-profile-matcher" +namespace llvm { + static cl::opt<unsigned> FuncProfileSimilarityThreshold( "func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " @@ -55,6 +57,8 @@ static cl::opt<unsigned> SalvageStaleProfileMaxCallsites( cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped.")); +} // end namespace llvm + void SampleProfileMatcher::findIRAnchors(const Function &F, AnchorMap &IRAnchors) const { // For inlined code, recover the original callsite and callee by finding the diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 09bffa7..ac41fdd 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -120,6 +120,8 @@ STATISTIC(NumVirtConstProp1Bit, "Number of 1 bit virtual constant propagations"); STATISTIC(NumVirtConstProp, "Number of virtual constant propagations"); +namespace llvm { + static cl::opt<PassSummaryAction> ClSummaryAction( "wholeprogramdevirt-summary-action", cl::desc("What to do with the summary when running this pass"), @@ -175,6 +177,8 @@ static cl::list<std::string> extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + /// With Clang, a pure virtual class's deleting destructor is emitted as a /// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the /// context of whole program devirtualization, the deleting destructor of a pure diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index b6b3a95..87000a1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2934,32 +2934,6 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC, return nullptr; } -static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) { - FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition()); - if (!FI) - return nullptr; - - Value *Cond = FI->getOperand(0); - Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue(); - - // select (freeze(x == y)), x, y --> y - // select (freeze(x != y)), x, y --> x - // The freeze should be only used by this select. Otherwise, remaining uses of - // the freeze can observe a contradictory value. - // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1 - // a = select c, x, y ; - // f(a, c) ; f(poison, 1) cannot happen, but if a is folded - // ; to y, this can happen. - CmpPredicate Pred; - if (FI->hasOneUse() && - match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) && - (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) { - return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal; - } - - return nullptr; -} - /// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI. static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI, Value *CondVal, @@ -4446,9 +4420,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Instruction *PN = foldSelectToPhi(SI, DT, Builder)) return replaceInstUsesWith(SI, PN); - if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) - return replaceInstUsesWith(SI, Fr); - if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder)) return replaceInstUsesWith(SI, V); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6ef3066..18a45c6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -319,20 +319,20 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { return nullptr; } -/// Find elements of V demanded by UserInstr. -static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { +/// Find elements of V demanded by UserInstr. If returns false, we were not able +/// to determine all elements. +static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, + APInt &UnionUsedElts) { unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements(); - // Conservatively assume that all elements are needed. - APInt UsedElts(APInt::getAllOnes(VWidth)); - switch (UserInstr->getOpcode()) { case Instruction::ExtractElement: { ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr); assert(EEI->getVectorOperand() == V); ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand()); if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) { - UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue()); + UnionUsedElts.setBit(EEIIndexC->getZExtValue()); + return true; } break; } @@ -341,23 +341,23 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) { unsigned MaskNumElts = cast<FixedVectorType>(UserInstr->getType())->getNumElements(); - UsedElts = APInt(VWidth, 0); - for (unsigned i = 0; i < MaskNumElts; i++) { - unsigned MaskVal = Shuffle->getMaskValue(i); + for (auto I : llvm::seq(MaskNumElts)) { + unsigned MaskVal = Shuffle->getMaskValue(I); if (MaskVal == -1u || MaskVal >= 2 * VWidth) continue; if (Shuffle->getOperand(0) == V && (MaskVal < VWidth)) - UsedElts.setBit(MaskVal); + UnionUsedElts.setBit(MaskVal); if (Shuffle->getOperand(1) == V && ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth))) - UsedElts.setBit(MaskVal - VWidth); + UnionUsedElts.setBit(MaskVal - VWidth); } - break; + return true; } default: break; } - return UsedElts; + + return false; } /// Find union of elements of V demanded by all its users. @@ -370,7 +370,8 @@ static APInt findDemandedEltsByAllUsers(Value *V) { APInt UnionUsedElts(VWidth, 0); for (const Use &U : V->uses()) { if (Instruction *I = dyn_cast<Instruction>(U.getUser())) { - UnionUsedElts |= findDemandedEltsBySingleUser(V, I); + if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts)) + return APInt::getAllOnes(VWidth); } else { UnionUsedElts = APInt::getAllOnes(VWidth); break; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 5d2d79e..917004c 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -132,9 +132,11 @@ STATISTIC(NumReassoc , "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); -static cl::opt<bool> -EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"), - cl::init(true)); +namespace llvm { + +static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking", + cl::desc("Enable code sinking"), + cl::init(true)); static cl::opt<unsigned> MaxSinkNumUsers( "instcombine-max-sink-users", cl::init(32), @@ -156,6 +158,8 @@ extern cl::opt<bool> ProfcheckDisableMetadataFixes; static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare", cl::Hidden, cl::init(true)); +} // end namespace llvm + std::optional<Instruction *> InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) { // Handle target specific intrinsics diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 0249f21..cf87e35 100644 --- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -55,11 +55,11 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); +namespace llvm { extern cl::opt<unsigned> MaxNumVTableAnnotations; -namespace llvm { extern cl::opt<bool> EnableVTableProfileUse; -} +} // namespace llvm // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index cf076b9a..eff6f0c 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1923,20 +1923,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Shadow = ParamTLS+ArgOffset. Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) { - Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy); - if (ArgOffset) - Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg"); + return IRB.CreatePtrAdd(MS.ParamTLS, + ConstantInt::get(MS.IntptrTy, ArgOffset), "_msarg"); } /// Compute the origin address for a given function argument. Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) { if (!MS.TrackOrigins) return nullptr; - Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); - if (ArgOffset) - Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg_o"); + return IRB.CreatePtrAdd(MS.ParamOriginTLS, + ConstantInt::get(MS.IntptrTy, ArgOffset), + "_msarg_o"); } /// Compute the shadow address for a retval. @@ -7219,9 +7216,8 @@ struct VarArgHelperBase : public VarArgHelper { /// Compute the shadow address for a given va_arg. Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) { - Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); - Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_s"); + return IRB.CreatePtrAdd( + MS.VAArgTLS, ConstantInt::get(MS.IntptrTy, ArgOffset), "_msarg_va_s"); } /// Compute the shadow address for a given va_arg. @@ -7235,12 +7231,12 @@ struct VarArgHelperBase : public VarArgHelper { /// Compute the origin address for a given va_arg. Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) { - Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy); // getOriginPtrForVAArgument() is always called after // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never // overflow. - Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); - return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_o"); + return IRB.CreatePtrAdd(MS.VAArgOriginTLS, + ConstantInt::get(MS.IntptrTy, ArgOffset), + "_msarg_va_o"); } void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase, @@ -7467,10 +7463,8 @@ struct VarArgAMD64Helper : public VarArgHelperBase { NextNodeIRBuilder IRB(OrigInst); Value *VAListTag = OrigInst->getArgOperand(0); - Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, 16)), - MS.PtrTy); + Value *RegSaveAreaPtrPtr = + IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, 16)); Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; const Align Alignment = Align(16); @@ -7482,10 +7476,8 @@ struct VarArgAMD64Helper : public VarArgHelperBase { if (MS.TrackOrigins) IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy, Alignment, AMD64FpEndOffset); - Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, 8)), - MS.PtrTy); + Value *OverflowArgAreaPtrPtr = + IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, 8)); Value *OverflowArgAreaPtr = IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr); Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr; @@ -7615,19 +7607,15 @@ struct VarArgAArch64Helper : public VarArgHelperBase { // Retrieve a va_list field of 'void*' size. Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) { - Value *SaveAreaPtrPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, offset)), - MS.PtrTy); + Value *SaveAreaPtrPtr = + IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, offset)); return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr); } // Retrieve a va_list field of 'int' size. Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) { - Value *SaveAreaPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), - ConstantInt::get(MS.IntptrTy, offset)), - MS.PtrTy); + Value *SaveAreaPtr = + IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, offset)); Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr); return IRB.CreateSExt(SaveArea32, MS.IntptrTy); } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index d9e850e..120c4f6 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -222,7 +222,6 @@ cl::opt<bool> NoPGOWarnMismatchComdatWeak( cl::desc("The option is used to turn on/off " "warnings about hash mismatch for comdat " "or weak functions.")); -} // namespace llvm // Command line option to enable/disable select instruction instrumentation. static cl::opt<bool> @@ -347,7 +346,6 @@ cl::list<std::string> CtxPGOSkipCallsiteInstrument( extern cl::opt<unsigned> MaxNumVTableAnnotations; -namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts extern cl::opt<PGOViewCountsType> PGOViewCounts; diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 343bec3..a5f417a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -54,6 +54,8 @@ using namespace llvm; STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); +namespace llvm { + // The minimum call count to optimize memory intrinsic calls. static cl::opt<unsigned> MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000), @@ -93,6 +95,8 @@ static cl::opt<unsigned> MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128), cl::desc("Optimize the memop size <= this value")); +} // end namespace llvm + namespace { static const char *getMIName(const MemIntrinsic *MI) { diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index a3d4e53..0534fdd 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -21,7 +21,9 @@ using namespace llvm; using CandidateInfo = ValueProfileCollector::CandidateInfo; +namespace llvm { extern cl::opt<bool> MemOPOptMemcmpBcmp; +} // end namespace llvm ///--------------------------- MemIntrinsicPlugin ------------------------------ class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> { diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp index 2025fbb..3c14036e 100644 --- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp +++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp @@ -26,6 +26,8 @@ using namespace llvm; +namespace llvm { + static cl::opt<unsigned> JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden, cl::desc("Only split jump tables with size less or " @@ -43,6 +45,8 @@ static cl::opt<unsigned> FunctionSizeThreshold( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + #define DEBUG_TYPE "jump-table-to-switch" namespace { @@ -201,14 +205,12 @@ PreservedAnalyses JumpTableToSwitchPass::run(Function &F, PostDominatorTree *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F); DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy); bool Changed = false; - InstrProfSymtab Symtab; - if (auto E = Symtab.create(*F.getParent())) - F.getContext().emitError( - "Could not create indirect call table, likely corrupted IR" + - toString(std::move(E))); - DenseMap<const Function *, GlobalValue::GUID> FToGuid; - for (const auto &[G, FPtr] : Symtab.getIDToNameMap()) - FToGuid.insert({FPtr, G}); + auto FuncToGuid = [&](const Function &Fct) { + if (Fct.getMetadata(AssignGUIDPass::GUIDMetadataName)) + return AssignGUIDPass::getGUID(Fct); + + return Function::getGUIDAssumingExternalLinkage(getIRPGOFuncName(F, InLTO)); + }; for (BasicBlock &BB : make_early_inc_range(F)) { BasicBlock *CurrentBB = &BB; @@ -230,12 +232,8 @@ PreservedAnalyses JumpTableToSwitchPass::run(Function &F, std::optional<JumpTableTy> JumpTable = parseJumpTable(GEP, PtrTy); if (!JumpTable) continue; - SplittedOutTail = expandToSwitch( - Call, *JumpTable, DTU, ORE, [&](const Function &Fct) { - if (Fct.getMetadata(AssignGUIDPass::GUIDMetadataName)) - return AssignGUIDPass::getGUID(Fct); - return FToGuid.lookup_or(&Fct, 0U); - }); + SplittedOutTail = + expandToSwitch(Call, *JumpTable, DTU, ORE, FuncToGuid); Changed = true; break; } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index bab1f2a..9655173 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -116,6 +116,8 @@ STATISTIC(NumIntAssociationsHoisted, STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions " "reassociated and hoisted out of the loop"); +namespace llvm { + /// Memory promotion is enabled by default. static cl::opt<bool> DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), @@ -154,7 +156,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit( // which may not be precise, since optimizeUses is capped. The result is // correct, but we may not get as "far up" as possible to get which access is // clobbering the one queried. -cl::opt<unsigned> llvm::SetLicmMssaOptCap( +cl::opt<unsigned> SetLicmMssaOptCap( "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls.")); @@ -162,7 +164,7 @@ cl::opt<unsigned> llvm::SetLicmMssaOptCap( // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. -cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( +cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap( "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " @@ -171,6 +173,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp index 1a9e16b..d31154f 100644 --- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -17,6 +17,8 @@ using namespace llvm; +namespace llvm { + /// Uses the "source_filename" instead of a Module hash ID for the suffix of /// promoted locals during LTO. NOTE: This requires that the source filename /// has a unique name / path to avoid name collisions. @@ -35,6 +37,8 @@ cl::list<GlobalValue::GUID> MoveSymbolGUID( "used with the name of contextual profiling roots."), cl::Hidden); +} // end namespace llvm + FunctionImportGlobalProcessing::FunctionImportGlobalProcessing( Module &M, const ModuleSummaryIndex &Index, SetVector<GlobalValue *> *GlobalsToImport, bool ClearDSOLocalOnDeclarations) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 4d1f768..8bba634 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -95,7 +95,9 @@ using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" -cl::opt<bool> llvm::RequireAndPreserveDomTree( +namespace llvm { + +cl::opt<bool> RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::desc( @@ -205,6 +207,8 @@ static cl::opt<unsigned> MaxJumpThreadingLiveBlocks( extern cl::opt<bool> ProfcheckDisableMetadataFixes; +} // end namespace llvm + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 12fb46d..7fa787b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5699,6 +5699,20 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { Worklist.push_back(InstOp); } + auto UpdateMemOpUserCost = [this, VF](LoadInst *LI) { + // If there are direct memory op users of the newly scalarized load, + // their cost may have changed because there's no scalarization + // overhead for the operand. Update it. + for (User *U : LI->users()) { + if (!isa<LoadInst, StoreInst>(U)) + continue; + if (getWideningDecision(cast<Instruction>(U), VF) != CM_Scalarize) + continue; + setWideningDecision( + cast<Instruction>(U), VF, CM_Scalarize, + getMemInstScalarizationCost(cast<Instruction>(U), VF)); + } + }; for (auto *I : AddrDefs) { if (isa<LoadInst>(I)) { // Setting the desired widening decision should ideally be handled in @@ -5708,21 +5722,24 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) { InstWidening Decision = getWideningDecision(I, VF); if (Decision == CM_Widen || Decision == CM_Widen_Reverse || (!isPredicatedInst(I) && !Legal->isUniformMemOp(*I, VF) && - Decision == CM_Scalarize)) + Decision == CM_Scalarize)) { // Scalarize a widened load of address or update the cost of a scalar // load of an address. setWideningDecision( I, VF, CM_Scalarize, (VF.getKnownMinValue() * getMemoryInstructionCost(I, ElementCount::getFixed(1)))); - else if (const auto *Group = getInterleavedAccessGroup(I)) { + UpdateMemOpUserCost(cast<LoadInst>(I)); + } else if (const auto *Group = getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { - if (Instruction *Member = Group->getMember(I)) + if (Instruction *Member = Group->getMember(I)) { setWideningDecision( Member, VF, CM_Scalarize, (VF.getKnownMinValue() * getMemoryInstructionCost(Member, ElementCount::getFixed(1)))); + UpdateMemOpUserCost(cast<LoadInst>(Member)); + } } } } else { @@ -7937,6 +7954,13 @@ bool VPRecipeBuilder::getScaledReductions( auto CollectExtInfo = [this, &Exts, &ExtOpTypes, &ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool { for (const auto &[I, OpI] : enumerate(Ops)) { + auto *CI = dyn_cast<ConstantInt>(OpI); + if (I > 0 && CI && + canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) { + ExtOpTypes[I] = ExtOpTypes[0]; + ExtKinds[I] = ExtKinds[0]; + continue; + } Value *ExtOp; if (!match(OpI, m_ZExtOrSExt(m_Value(ExtOp)))) return false; @@ -9521,55 +9545,52 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); - DenseMap<Value *, Value *> ToFrozen; - SmallVector<Instruction *> InstsToMove; // Ensure that the start values for all header phi recipes are updated before // vectorizing the epilogue loop. - for (VPRecipeBase &R : Header->phis()) { - if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) { - // When vectorizing the epilogue loop, the canonical induction start - // value needs to be changed from zero to the value after the main - // vector loop. Find the resume value created during execution of the main - // VPlan. It must be the first phi in the loop preheader. - // FIXME: Improve modeling for canonical IV start values in the epilogue - // loop. - using namespace llvm::PatternMatch; - PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); - for (Value *Inc : EPResumeVal->incoming_values()) { - if (match(Inc, m_SpecificInt(0))) - continue; - assert(!EPI.VectorTripCount && - "Must only have a single non-zero incoming value"); - EPI.VectorTripCount = Inc; - } - // If we didn't find a non-zero vector trip count, all incoming values - // must be zero, which also means the vector trip count is zero. Pick the - // first zero as vector trip count. - // TODO: We should not choose VF * UF so the main vector loop is known to - // be dead. - if (!EPI.VectorTripCount) { - assert( - EPResumeVal->getNumIncomingValues() > 0 && - all_of(EPResumeVal->incoming_values(), - [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) && - "all incoming values must be 0"); - EPI.VectorTripCount = EPResumeVal->getOperand(0); - } - VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); - assert(all_of(IV->users(), - [](const VPUser *U) { - return isa<VPScalarIVStepsRecipe>(U) || - isa<VPDerivedIVRecipe>(U) || - cast<VPRecipeBase>(U)->isScalarCast() || - cast<VPInstruction>(U)->getOpcode() == - Instruction::Add; - }) && - "the canonical IV should only be used by its increment or " - "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); + // When vectorizing the epilogue loop, the canonical induction start + // value needs to be changed from zero to the value after the main + // vector loop. Find the resume value created during execution of the main + // VPlan. It must be the first phi in the loop preheader. + // FIXME: Improve modeling for canonical IV start values in the epilogue + // loop. + using namespace llvm::PatternMatch; + PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin(); + for (Value *Inc : EPResumeVal->incoming_values()) { + if (match(Inc, m_SpecificInt(0))) continue; - } + assert(!EPI.VectorTripCount && + "Must only have a single non-zero incoming value"); + EPI.VectorTripCount = Inc; + } + // If we didn't find a non-zero vector trip count, all incoming values + // must be zero, which also means the vector trip count is zero. Pick the + // first zero as vector trip count. + // TODO: We should not choose VF * UF so the main vector loop is known to + // be dead. + if (!EPI.VectorTripCount) { + assert(EPResumeVal->getNumIncomingValues() > 0 && + all_of(EPResumeVal->incoming_values(), + [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) && + "all incoming values must be 0"); + EPI.VectorTripCount = EPResumeVal->getOperand(0); + } + VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); + assert(all_of(IV->users(), + [](const VPUser *U) { + return isa<VPScalarIVStepsRecipe>(U) || + isa<VPDerivedIVRecipe>(U) || + cast<VPRecipeBase>(U)->isScalarCast() || + cast<VPInstruction>(U)->getOpcode() == + Instruction::Add; + }) && + "the canonical IV should only be used by its increment or " + "ScalarIVSteps when resetting the start value"); + IV->setOperand(0, VPV); + DenseMap<Value *, Value *> ToFrozen; + SmallVector<Instruction *> InstsToMove; + for (VPRecipeBase &R : drop_begin(Header->phis())) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f77d587..fedca65 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2241,10 +2241,9 @@ public: /// TODO: If load combining is allowed in the IR optimizer, this analysis /// may not be necessary. bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const; - bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, - ArrayRef<unsigned> Order, const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - const int64_t Diff, StridedPtrInfo &SPtrInfo) const; + bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align Alignment, const int64_t Diff, Value *Ptr0, + Value *PtrN, StridedPtrInfo &SPtrInfo) const; /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. @@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, /// 4. Any pointer operand is an instruction with the users outside of the /// current graph (for masked gathers extra extractelement instructions /// might be required). -bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, - ArrayRef<unsigned> Order, - const TargetTransformInfo &TTI, - const DataLayout &DL, ScalarEvolution &SE, - const int64_t Diff, - StridedPtrInfo &SPtrInfo) const { - const size_t Sz = VL.size(); +bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy, + Align Alignment, const int64_t Diff, Value *Ptr0, + Value *PtrN, StridedPtrInfo &SPtrInfo) const { + const size_t Sz = PointerOps.size(); if (Diff % (Sz - 1) != 0) return false; @@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, }); const uint64_t AbsoluteDiff = std::abs(Diff); - Type *ScalarTy = VL.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, Sz); if (IsAnyPointerUsedOutGraph || (AbsoluteDiff > Sz && @@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, int64_t Stride = Diff / static_cast<int64_t>(Sz - 1); if (Diff != Stride * static_cast<int64_t>(Sz - 1)) return false; - Align Alignment = - cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()]) - ->getAlign(); - if (!TTI.isLegalStridedLoadStore(VecTy, Alignment)) + if (!TTI->isLegalStridedLoadStore(VecTy, Alignment)) return false; - Value *Ptr0; - Value *PtrN; - if (Order.empty()) { - Ptr0 = PointerOps.front(); - PtrN = PointerOps.back(); - } else { - Ptr0 = PointerOps[Order.front()]; - PtrN = PointerOps[Order.back()]; - } + // Iterate through all pointers and check if all distances are // unique multiple of Dist. SmallSet<int64_t, 4> Dists; @@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps, if (Ptr == PtrN) Dist = Diff; else if (Ptr != Ptr0) - Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); + Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE); // If the strides are not the same or repeated, we can't // vectorize. if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second) break; } if (Dists.size() == Sz) { - Type *StrideTy = DL.getIndexType(Ptr0->getType()); + Type *StrideTy = DL->getIndexType(Ptr0->getType()); SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride); SPtrInfo.Ty = getWidenedType(ScalarTy, Sz); return true; @@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( cast<Instruction>(V), UserIgnoreList); })) return LoadsState::CompressVectorize; - if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo)) + Align Alignment = + cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()]) + ->getAlign(); + if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN, + SPtrInfo)) return LoadsState::StridedVectorize; } if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ffd2e59..07b191a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -978,6 +978,16 @@ void VPlan::execute(VPTransformState *State) { // If the original loop is unreachable, delete it and all its blocks. if (!ScalarPhVPBB->hasPredecessors()) { + // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit + // VPIRBBs in VPlan as well, otherwise we would retain references to deleted + // IR instructions. + for (VPIRBasicBlock *EB : getExitBlocks()) { + for (VPRecipeBase &R : make_early_inc_range(EB->phis())) { + if (R.getNumOperands() == 1) + R.eraseFromParent(); + } + } + Loop *OrigLoop = State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock()); auto Blocks = OrigLoop->getBlocksVector(); @@ -1743,6 +1753,16 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) { } #endif +bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, + TTI::PartialReductionExtendKind ExtKind) { + APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits()); + unsigned WideSize = CI->getType()->getScalarSizeInBits(); + APInt ExtendedVal = ExtKind == TTI::PR_SignExtend + ? TruncatedVal.sext(WideSize) + : TruncatedVal.zext(WideSize); + return ExtendedVal == CI->getValue(); +} + TargetTransformInfo::OperandValueInfo VPCostContext::getOperandInfo(VPValue *V) const { if (!V->isLiveIn()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index fe59774..fc1a09e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -468,6 +468,10 @@ public: }; #endif +/// Check if a constant \p CI can be safely treated as having been extended +/// from a narrower type with the given extension kind. +bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType, + TTI::PartialReductionExtendKind ExtKind); } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 46909a5..67b9244 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -340,6 +340,14 @@ VPPartialReductionRecipe::computeCost(ElementCount VF, : Widen->getOperand(1)); ExtAType = GetExtendKind(ExtAR); ExtBType = GetExtendKind(ExtBR); + + if (!ExtBR && Widen->getOperand(1)->isLiveIn()) { + auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue()); + if (canConstantBeExtended(CI, InputTypeA, ExtAType)) { + InputTypeB = InputTypeA; + ExtBType = ExtAType; + } + } }; if (isa<VPWidenCastRecipe>(OpR)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a73b083..acdb379 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -40,7 +40,7 @@ using namespace llvm; using namespace VPlanPatternMatch; -cl::opt<bool> EnableWideActiveLaneMask( +static cl::opt<bool> EnableWideActiveLaneMask( "enable-wide-lane-mask", cl::init(false), cl::Hidden, cl::desc("Enable use of wide get active lane mask instructions")); |