diff options
Diffstat (limited to 'llvm/lib/Transforms')
17 files changed, 223 insertions, 207 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index bbbac45..7a95df4 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -907,10 +907,20 @@ static bool mergeConsecutivePartStores(ArrayRef<PartStore> Parts, StoreInst *Store = Builder.CreateAlignedStore( Val, First.Store->getPointerOperand(), First.Store->getAlign()); + // Merge various metadata onto the new store. AAMDNodes AATags = First.Store->getAAMetadata(); - for (const PartStore &Part : drop_begin(Parts)) + SmallVector<Instruction *> Stores = {First.Store}; + Stores.reserve(Parts.size()); + SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()}; + DbgLocs.reserve(Parts.size()); + for (const PartStore &Part : drop_begin(Parts)) { AATags = AATags.concat(Part.Store->getAAMetadata()); + Stores.push_back(Part.Store); + DbgLocs.push_back(Part.Store->getDebugLoc()); + } Store->setAAMetadata(AATags); + Store->mergeDIAssignID(Stores); + Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs)); // Remove the old stores. for (const PartStore &Part : Parts) diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 46fb567..aa1346d 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1271,7 +1271,7 @@ bool LowerTypeTestsModule::hasBranchTargetEnforcement() { // the module flags. if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("branch-target-enforcement"))) - HasBranchTargetEnforcement = (BTE->getZExtValue() != 0); + HasBranchTargetEnforcement = !BTE->isZero(); else HasBranchTargetEnforcement = 0; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index 76e588b..a0f7ec6 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,7 +24,8 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines: +// This pass is intended to be used during the regular/thin and non-LTO +// pipelines: // // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are @@ -48,6 +49,14 @@ // is supported. // - Import phase: (same as with hybrid case above). // +// During Speculative devirtualization mode -not restricted to LTO-: +// - The pass applies speculative devirtualization without requiring any type of +// visibility. +// - Skips other features like virtual constant propagation, uniform return +// value optimization, unique return value optimization and branch funnels as +// they need LTO. +// - This mode is enabled via 'devirtualize-speculatively' flag. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -61,7 +70,9 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -145,6 +156,13 @@ static cl::opt<std::string> ClWriteSummary( "bitcode, otherwise YAML"), cl::Hidden); +// TODO: This option eventually should support any public visibility vtables +// with/out LTO. +static cl::opt<bool> ClDevirtualizeSpeculatively( + "devirtualize-speculatively", + cl::desc("Enable speculative devirtualization optimization"), + cl::init(false)); + static cl::opt<unsigned> ClThreshold("wholeprogramdevirt-branch-funnel-threshold", cl::Hidden, cl::init(10), @@ -892,6 +910,8 @@ void llvm::updatePublicTypeTestCalls(Module &M, CI->eraseFromParent(); } } else { + // TODO: Don't replace public type tests when speculative devirtualization + // gets enabled in LTO mode. auto *True = ConstantInt::getTrue(M.getContext()); for (Use &U : make_early_inc_range(PublicTypeTestFunc->uses())) { auto *CI = cast<CallInst>(U.getUser()); @@ -1083,10 +1103,10 @@ bool DevirtModule::tryFindVirtualCallTargets( if (!TM.Bits->GV->isConstant()) return false; - // We cannot perform whole program devirtualization analysis on a vtable - // with public LTO visibility. - if (TM.Bits->GV->getVCallVisibility() == - GlobalObject::VCallVisibilityPublic) + // Without ClDevirtualizeSpeculatively, we cannot perform whole program + // devirtualization analysis on a vtable with public LTO visibility. + if (!ClDevirtualizeSpeculatively && TM.Bits->GV->getVCallVisibility() == + GlobalObject::VCallVisibilityPublic) return false; Function *Fn = nullptr; @@ -1105,6 +1125,12 @@ bool DevirtModule::tryFindVirtualCallTargets( if (Fn->getName() == "__cxa_pure_virtual") continue; + // In most cases empty functions will be overridden by the + // implementation of the derived class, so we can skip them. + if (ClDevirtualizeSpeculatively && Fn->getReturnType()->isVoidTy() && + Fn->getInstructionCount() <= 1) + continue; + // We can disregard unreachable functions as possible call targets, as // unreachable functions shouldn't be called. if (mustBeUnreachableFunction(Fn, ExportSummary)) @@ -1223,10 +1249,12 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, CallTrap->setDebugLoc(CB.getDebugLoc()); } - // If fallback checking is enabled, add support to compare the virtual - // function pointer to the devirtualized target. In case of a mismatch, - // fall back to indirect call. - if (DevirtCheckMode == WPDCheckMode::Fallback) { + // If fallback checking or speculative devirtualization are enabled, + // add support to compare the virtual function pointer to the + // devirtualized target. In case of a mismatch, fall back to indirect + // call. + if (DevirtCheckMode == WPDCheckMode::Fallback || + ClDevirtualizeSpeculatively) { MDNode *Weights = MDBuilder(M.getContext()).createLikelyBranchWeights(); // Version the indirect call site. If the called value is equal to the // given callee, 'NewInst' will be executed, otherwise the original call @@ -2057,15 +2085,15 @@ void DevirtModule::scanTypeTestUsers( Function *TypeTestFunc, DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) { // Find all virtual calls via a virtual table pointer %p under an assumption - // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p - // points to a member of the type identifier %md. Group calls by (type ID, - // offset) pair (effectively the identity of the virtual function) and store - // to CallSlots. + // of the form llvm.assume(llvm.type.test(%p, %md)) or + // llvm.assume(llvm.public.type.test(%p, %md)). + // This indicates that %p points to a member of the type identifier %md. + // Group calls by (type ID, offset) pair (effectively the identity of the + // virtual function) and store to CallSlots. for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { auto *CI = dyn_cast<CallInst>(U.getUser()); if (!CI) continue; - // Search for virtual calls based on %p and add them to DevirtCalls. SmallVector<DevirtCallSite, 1> DevirtCalls; SmallVector<CallInst *, 1> Assumes; @@ -2348,6 +2376,12 @@ bool DevirtModule::run() { (ImportSummary && ImportSummary->partiallySplitLTOUnits())) return false; + Function *PublicTypeTestFunc = nullptr; + // If we are in speculative devirtualization mode, we can work on the public + // type test intrinsics. + if (ClDevirtualizeSpeculatively) + PublicTypeTestFunc = + Intrinsic::getDeclarationIfExists(&M, Intrinsic::public_type_test); Function *TypeTestFunc = Intrinsic::getDeclarationIfExists(&M, Intrinsic::type_test); Function *TypeCheckedLoadFunc = @@ -2361,8 +2395,9 @@ bool DevirtModule::run() { // module, this pass has nothing to do. But if we are exporting, we also need // to handle any users that appear only in the function summaries. if (!ExportSummary && - (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc || - AssumeFunc->use_empty()) && + (((!PublicTypeTestFunc || PublicTypeTestFunc->use_empty()) && + (!TypeTestFunc || TypeTestFunc->use_empty())) || + !AssumeFunc || AssumeFunc->use_empty()) && (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) && (!TypeCheckedLoadRelativeFunc || TypeCheckedLoadRelativeFunc->use_empty())) @@ -2373,6 +2408,9 @@ bool DevirtModule::run() { DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap; buildTypeIdentifierMap(Bits, TypeIdMap); + if (PublicTypeTestFunc && AssumeFunc) + scanTypeTestUsers(PublicTypeTestFunc, TypeIdMap); + if (TypeTestFunc && AssumeFunc) scanTypeTestUsers(TypeTestFunc, TypeIdMap); @@ -2472,8 +2510,12 @@ bool DevirtModule::run() { .WPDRes[S.first.ByteOffset]; if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos, S.first.ByteOffset, ExportSummary)) { - - if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { + bool SingleImplDevirt = + trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res); + // Out of speculative devirtualization mode, Try to apply virtual constant + // propagation or branch funneling. + // TODO: This should eventually be enabled for non-public type tests. + if (!SingleImplDevirt && !ClDevirtualizeSpeculatively) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 73ec451..9bee523 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2760,21 +2760,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; - if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && - match(Op1, m_PtrToInt(m_Value(RHSOp)))) + if (match(Op0, m_PtrToIntOrAddr(m_Value(LHSOp))) && + match(Op1, m_PtrToIntOrAddr(m_Value(RHSOp)))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), I.hasNoUnsignedWrap())) return replaceInstUsesWith(I, Res); // trunc(p)-trunc(q) -> trunc(p-q) - if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && - match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) + if (match(Op0, m_Trunc(m_PtrToIntOrAddr(m_Value(LHSOp)))) && + match(Op1, m_Trunc(m_PtrToIntOrAddr(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType(), /* IsNUW */ false)) return replaceInstUsesWith(I, Res); - if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && - match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) { + auto MatchSubOfZExtOfPtrToIntOrAddr = [&]() { + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToIntSameSize(DL, m_Value(RHSOp))))) + return true; + if (match(Op0, m_ZExt(m_PtrToAddr(m_Value(LHSOp)))) && + match(Op1, m_ZExt(m_PtrToAddr(m_Value(RHSOp))))) + return true; + // Special case for non-canonical ptrtoint in constant expression, + // where the zext has been folded into the ptrtoint. + if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) && + match(Op1, m_PtrToInt(m_Value(RHSOp)))) + return true; + return false; + }; + if (MatchSubOfZExtOfPtrToIntOrAddr()) { if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) { if (GEP->getPointerOperand() == RHSOp) { if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index dab200d..669d4f0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4003,18 +4003,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Try to fold intrinsic into select/phi operands. This is legal if: // * The intrinsic is speculatable. - // * The select condition is not a vector, or the intrinsic does not - // perform cross-lane operations. - if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) && - isNotCrossLaneOperation(II)) + // * The operand is one of the following: + // - a phi. + // - a select with a scalar condition. + // - a select with a vector condition and II is not a cross lane operation. + if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) { for (Value *Op : II->args()) { - if (auto *Sel = dyn_cast<SelectInst>(Op)) - if (Instruction *R = FoldOpIntoSelect(*II, Sel)) + if (auto *Sel = dyn_cast<SelectInst>(Op)) { + bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy(); + if (IsVectorCond && !isNotCrossLaneOperation(II)) + continue; + // Don't replace a scalar select with a more expensive vector select if + // we can't simplify both arms of the select. + bool SimplifyBothArms = + !Op->getType()->isVectorTy() && II->getType()->isVectorTy(); + if (Instruction *R = FoldOpIntoSelect( + *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms)) return R; + } if (auto *Phi = dyn_cast<PHINode>(Op)) if (Instruction *R = foldOpIntoPhi(*II, Phi)) return R; } + } if (Instruction *Shuf = foldShuffledIntrinsicOperands(II)) return Shuf; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 943c223..ede73f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -664,7 +664,8 @@ public: /// This also works for Cast instructions, which obviously do not have a /// second operand. Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse = false); + bool FoldWithMultiUse = false, + bool SimplifyBothArms = false); /// This is a convenience wrapper function for the above two functions. Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 3f11cae..67e2aae 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, - bool FoldWithMultiUse) { + bool FoldWithMultiUse, + bool SimplifyBothArms) { // Don't modify shared select instructions unless set FoldWithMultiUse if (!SI->hasOneUse() && !FoldWithMultiUse) return nullptr; @@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (!NewTV && !NewFV) return nullptr; + if (SimplifyBothArms && !(NewTV && NewFV)) + return nullptr; + // Create an instruction for the arm that did not fold. if (!NewTV) NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this); diff --git a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp index 29968b8..8181e4e 100644 --- a/llvm/lib/Transforms/Instrumentation/AllocToken.cpp +++ b/llvm/lib/Transforms/Instrumentation/AllocToken.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" +#include "llvm/Support/AllocToken.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -54,47 +55,14 @@ #include <variant> using namespace llvm; +using TokenMode = AllocTokenMode; #define DEBUG_TYPE "alloc-token" namespace { -//===--- Constants --------------------------------------------------------===// - -enum class TokenMode : unsigned { - /// Incrementally increasing token ID. - Increment = 0, - - /// Simple mode that returns a statically-assigned random token ID. - Random = 1, - - /// Token ID based on allocated type hash. - TypeHash = 2, - - /// Token ID based on allocated type hash, where the top half ID-space is - /// reserved for types that contain pointers and the bottom half for types - /// that do not contain pointers. - TypeHashPointerSplit = 3, -}; - //===--- Command-line options ---------------------------------------------===// -cl::opt<TokenMode> ClMode( - "alloc-token-mode", cl::Hidden, cl::desc("Token assignment mode"), - cl::init(TokenMode::TypeHashPointerSplit), - cl::values( - clEnumValN(TokenMode::Increment, "increment", - "Incrementally increasing token ID"), - clEnumValN(TokenMode::Random, "random", - "Statically-assigned random token ID"), - clEnumValN(TokenMode::TypeHash, "typehash", - "Token ID based on allocated type hash"), - clEnumValN( - TokenMode::TypeHashPointerSplit, "typehashpointersplit", - "Token ID based on allocated type hash, where the top half " - "ID-space is reserved for types that contain pointers and the " - "bottom half for types that do not contain pointers. "))); - cl::opt<std::string> ClFuncPrefix("alloc-token-prefix", cl::desc("The allocation function prefix"), cl::Hidden, cl::init("__alloc_token_")); @@ -217,22 +185,19 @@ public: using ModeBase::ModeBase; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { - const auto [N, H] = getHash(CB, ORE); - return N ? boundedToken(H) : H; - } -protected: - std::pair<MDNode *, uint64_t> getHash(const CallBase &CB, - OptimizationRemarkEmitter &ORE) { if (MDNode *N = getAllocTokenMetadata(CB)) { MDString *S = cast<MDString>(N->getOperand(0)); - return {N, getStableSipHash(S->getString())}; + AllocTokenMetadata Metadata{S->getString(), containsPointer(N)}; + if (auto Token = getAllocToken(TokenMode::TypeHash, Metadata, MaxTokens)) + return *Token; } // Fallback. remarkNoMetadata(CB, ORE); - return {nullptr, ClFallbackToken}; + return ClFallbackToken; } +protected: /// Remark that there was no precise type information. static void remarkNoMetadata(const CallBase &CB, OptimizationRemarkEmitter &ORE) { @@ -253,20 +218,18 @@ public: using TypeHashMode::TypeHashMode; uint64_t operator()(const CallBase &CB, OptimizationRemarkEmitter &ORE) { - if (MaxTokens == 1) - return 0; - const uint64_t HalfTokens = MaxTokens / 2; - const auto [N, H] = getHash(CB, ORE); - if (!N) { - // Pick the fallback token (ClFallbackToken), which by default is 0, - // meaning it'll fall into the pointer-less bucket. Override by setting - // -alloc-token-fallback if that is the wrong choice. - return H; + if (MDNode *N = getAllocTokenMetadata(CB)) { + MDString *S = cast<MDString>(N->getOperand(0)); + AllocTokenMetadata Metadata{S->getString(), containsPointer(N)}; + if (auto Token = getAllocToken(TokenMode::TypeHashPointerSplit, Metadata, + MaxTokens)) + return *Token; } - uint64_t Hash = H % HalfTokens; // base hash - if (containsPointer(N)) - Hash += HalfTokens; - return Hash; + // Pick the fallback token (ClFallbackToken), which by default is 0, meaning + // it'll fall into the pointer-less bucket. Override by setting + // -alloc-token-fallback if that is the wrong choice. + remarkNoMetadata(CB, ORE); + return ClFallbackToken; } }; @@ -286,7 +249,7 @@ public: : Options(transformOptionsFromCl(std::move(Opts))), Mod(M), FAM(MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()), Mode(IncrementMode(*IntPtrTy, *Options.MaxTokens)) { - switch (ClMode.getValue()) { + switch (Options.Mode) { case TokenMode::Increment: break; case TokenMode::Random: diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index d18c0d0..80e77e09 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -2020,7 +2020,6 @@ static void moveFastMathFlags(Function &F, F.removeFnAttr(attr); \ FMF.set##setter(); \ } - MOVE_FLAG("unsafe-fp-math", Fast) MOVE_FLAG("no-infs-fp-math", NoInfs) MOVE_FLAG("no-nans-fp-math", NoNaNs) MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 8714741a..9829d4d 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1793,3 +1793,13 @@ bool llvm::hasOnlySimpleTerminator(const Function &F) { } return true; } + +Printable llvm::printBasicBlock(const BasicBlock *BB) { + return Printable([BB](raw_ostream &OS) { + if (!BB) { + OS << "<nullptr>"; + return; + } + BB->printAsOperand(OS); + }); +} diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp index 978d5a2..371d9e6 100644 --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -260,9 +260,16 @@ bool PredicateInfoBuilder::stackIsInScope(const ValueDFSStack &Stack, // next to the defs they must go with so that we can know it's time to pop // the stack when we hit the end of the phi uses for a given def. const ValueDFS &Top = *Stack.back().V; - if (Top.LocalNum == LN_Last && Top.PInfo) { - if (!VDUse.U) - return false; + assert(Top.PInfo && "RenameStack should only contain predicate infos (defs)"); + if (Top.LocalNum == LN_Last) { + if (!VDUse.U) { + assert(VDUse.PInfo && "A non-use VDUse should have a predicate info"); + // We should reserve adjacent LN_Last defs for the same phi use. + return VDUse.LocalNum == LN_Last && + // If the two phi defs have the same edge, they must be designated + // for the same succ BB. + getBlockEdge(Top.PInfo) == getBlockEdge(VDUse.PInfo); + } auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser()); if (!PHI) return false; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index adf27be..3356516 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7231,6 +7231,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( return DenseMap<const SCEV *, Value *>(); } + VPlanTransforms::narrowInterleaveGroups( + BestVPlan, BestVF, + TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); VPlanTransforms::removeDeadRecipes(BestVPlan); VPlanTransforms::convertToConcreteRecipes(BestVPlan); @@ -8199,10 +8202,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, if (CM.foldTailWithEVL()) VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength, *Plan, CM.getMaxSafeElements()); - - if (auto P = VPlanTransforms::narrowInterleaveGroups(*Plan, TTI)) - VPlans.push_back(std::move(P)); - assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); VPlans.push_back(std::move(Plan)); } @@ -9860,6 +9859,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Get user vectorization factor and interleave count. ElementCount UserVF = Hints.getWidth(); unsigned UserIC = Hints.getInterleave(); + if (UserIC > 1 && !LVL.isSafeForAnyVectorWidth()) + UserIC = 1; // Plan how to best vectorize. LVP.plan(UserVF, UserIC); @@ -9924,7 +9925,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizeLoop = false; } - if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { + if (UserIC == 1 && Hints.getInterleave() > 1) { + assert(!LVL.isSafeForAnyVectorWidth() && + "UserIC should only be ignored due to unsafe dependencies"); + LLVM_DEBUG(dbgs() << "LV: Ignoring user-specified interleave count.\n"); + IntDiagMsg = {"InterleavingUnsafe", + "Ignoring user-specified interleave count due to possibly " + "unsafe dependencies in the loop."}; + InterleaveLoop = false; + } else if (!LVP.hasPlanWithVF(VF.Width) && UserIC > 1) { // Tell the user interleaving was avoided up-front, despite being explicitly // requested. LLVM_DEBUG(dbgs() << "LV: Ignoring UserIC, because vectorization and " diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index c95c887..428a8f4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1191,7 +1191,6 @@ VPlan *VPlan::duplicate() { } Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount; Old2NewVPValues[&VF] = &NewPlan->VF; - Old2NewVPValues[&UF] = &NewPlan->UF; Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF; if (BackedgeTakenCount) { NewPlan->BackedgeTakenCount = new VPValue(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 167ba55..06bea2f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4152,9 +4152,6 @@ class VPlan { /// Represents the vectorization factor of the loop. VPValue VF; - /// Represents the symbolic unroll factor of the loop. - VPValue UF; - /// Represents the loop-invariant VF * UF of the vector loop region. VPValue VFxUF; @@ -4308,9 +4305,6 @@ public: VPValue &getVF() { return VF; }; const VPValue &getVF() const { return VF; }; - /// Returns the symbolic UF of the vector loop region. - VPValue &getSymbolicUF() { return UF; }; - /// Returns VF * UF of the vector loop region. VPValue &getVFxUF() { return VFxUF; } @@ -4320,12 +4314,6 @@ public: void addVF(ElementCount VF) { VFs.insert(VF); } - /// Remove \p VF from the plan. - void removeVF(ElementCount VF) { - assert(hasVF(VF) && "tried to remove VF not present in plan"); - VFs.remove(VF); - } - void setVF(ElementCount VF) { assert(hasVF(VF) && "Cannot set VF not already in plan"); VFs.clear(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ff25ef5..f5a3af4 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3956,9 +3956,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, // used. // TODO: Assert that they aren't used. - VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); - Plan.getSymbolicUF().replaceAllUsesWith(UF); - // If there are no users of the runtime VF, compute VFxUF by constant folding // the multiplication of VF and UF. if (VF.getNumUsers() == 0) { @@ -3978,6 +3975,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, } VF.replaceAllUsesWith(RuntimeVF); + VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF())); VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF}); VFxUF.replaceAllUsesWith(MulByUF); } @@ -4045,14 +4043,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, return false; } -/// Returns VF from \p VFs if \p IR is a full interleave group with factor and -/// number of members both equal to VF. The interleave group must also access -/// the full vector width. -static std::optional<ElementCount> isConsecutiveInterleaveGroup( - VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs, - VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) { - if (!InterleaveR) - return std::nullopt; +/// Returns true if \p IR is a full interleave group with factor and number of +/// members both equal to \p VF. The interleave group must also access the full +/// vector width \p VectorRegWidth. +static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, + unsigned VF, VPTypeAnalysis &TypeInfo, + unsigned VectorRegWidth) { + if (!InterleaveR || InterleaveR->getMask()) + return false; Type *GroupElementTy = nullptr; if (InterleaveR->getStoredValues().empty()) { @@ -4061,7 +4059,7 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup( [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return std::nullopt; + return false; } else { GroupElementTy = TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]); @@ -4069,27 +4067,13 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup( [&TypeInfo, GroupElementTy](VPValue *Op) { return TypeInfo.inferScalarType(Op) == GroupElementTy; })) - return std::nullopt; + return false; } - auto GetVectorWidthForVF = [&TTI](ElementCount VF) { - TypeSize Size = TTI.getRegisterBitWidth( - VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector - : TargetTransformInfo::RGK_ScalableVector); - assert(Size.isScalable() == VF.isScalable() && - "if Size is scalable, VF must to and vice versa"); - return Size.getKnownMinValue(); - }; - - for (ElementCount VF : VFs) { - unsigned MinVal = VF.getKnownMinValue(); - unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal; - auto IG = InterleaveR->getInterleaveGroup(); - if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal && - GroupSize == GetVectorWidthForVF(VF)) - return {VF}; - } - return std::nullopt; + unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF; + auto IG = InterleaveR->getInterleaveGroup(); + return IG->getFactor() == VF && IG->getNumMembers() == VF && + GroupSize == VectorRegWidth; } /// Returns true if \p VPValue is a narrow VPValue. @@ -4100,18 +4084,16 @@ static bool isAlreadyNarrow(VPValue *VPV) { return RepR && RepR->isSingleScalar(); } -std::unique_ptr<VPlan> -VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, - const TargetTransformInfo &TTI) { - using namespace llvm::VPlanPatternMatch; +void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, + unsigned VectorRegWidth) { VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion(); - if (!VectorLoop) - return nullptr; + return; VPTypeAnalysis TypeInfo(Plan); + + unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; - std::optional<ElementCount> VFToOptimize; for (auto &R : *VectorLoop->getEntryBasicBlock()) { if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; @@ -4125,33 +4107,30 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, // * recipes writing to memory except interleave groups // Only support plans with a canonical induction phi. if (R.isPhi()) - return nullptr; + return; auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R); if (R.mayWriteToMemory() && !InterleaveR) - return nullptr; + return; + + // Do not narrow interleave groups if there are VectorPointer recipes and + // the plan was unrolled. The recipe implicitly uses VF from + // VPTransformState. + // TODO: Remove restriction once the VF for the VectorPointer offset is + // modeled explicitly as operand. + if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1) + return; // All other ops are allowed, but we reject uses that cannot be converted // when checking all allowed consumers (store interleave groups) below. if (!InterleaveR) continue; - // Try to find a single VF, where all interleave groups are consecutive and - // saturate the full vector width. If we already have a candidate VF, check - // if it is applicable for the current InterleaveR, otherwise look for a - // suitable VF across the Plans VFs. - // - if (VFToOptimize) { - if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo, - TTI)) - return nullptr; - } else { - if (auto VF = isConsecutiveInterleaveGroup( - InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI)) - VFToOptimize = *VF; - else - return nullptr; - } + // Bail out on non-consecutive interleave groups. + if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo, + VectorRegWidth)) + return; + // Skip read interleave groups. if (InterleaveR->getStoredValues().empty()) continue; @@ -4185,34 +4164,24 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>( InterleaveR->getStoredValues()[0]->getDefiningRecipe()); if (!WideMember0) - return nullptr; + return; for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) { auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe()); if (!R || R->getOpcode() != WideMember0->getOpcode() || R->getNumOperands() > 2) - return nullptr; + return; if (any_of(enumerate(R->operands()), [WideMember0, Idx = I](const auto &P) { const auto &[OpIdx, OpV] = P; return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx); })) - return nullptr; + return; } StoreGroups.push_back(InterleaveR); } if (StoreGroups.empty()) - return nullptr; - - // All interleave groups in Plan can be narrowed for VFToOptimize. Split the - // original Plan into 2: a) a new clone which contains all VFs of Plan, except - // VFToOptimize, and b) the original Plan with VFToOptimize as single VF. - std::unique_ptr<VPlan> NewPlan; - if (size(Plan.vectorFactors()) != 1) { - NewPlan = std::unique_ptr<VPlan>(Plan.duplicate()); - Plan.setVF(*VFToOptimize); - NewPlan->removeVF(*VFToOptimize); - } + return; // Convert InterleaveGroup \p R to a single VPWidenLoadRecipe. SmallPtrSet<VPValue *, 4> NarrowedOps; @@ -4283,8 +4252,9 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue()); VPBuilder PHBuilder(Plan.getVectorPreheader()); - VPValue *UF = &Plan.getSymbolicUF(); - if (VFToOptimize->isScalable()) { + VPValue *UF = Plan.getOrAddLiveIn( + ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF())); + if (VF.isScalable()) { VPValue *VScale = PHBuilder.createElementCount( CanIV->getScalarType(), ElementCount::getScalable(1)); VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF}); @@ -4296,10 +4266,6 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1))); } removeDeadRecipes(Plan); - assert(none_of(*VectorLoop->getEntryBasicBlock(), - IsaPred<VPVectorPointerRecipe>) && - "All VPVectorPointerRecipes should have been removed"); - return NewPlan; } /// Add branch weight metadata, if the \p Plan's middle block is terminated by a diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ca8d956..b28559b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -341,20 +341,14 @@ struct VPlanTransforms { static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan, ScalarEvolution &SE); - /// Try to find a single VF among \p Plan's VFs for which all interleave - /// groups (with known minimum VF elements) can be replaced by wide loads and - /// stores processing VF elements, if all transformed interleave groups access - /// the full vector width (checked via the maximum vector register width). If - /// the transformation can be applied, the original \p Plan will be split in - /// 2: - /// 1. The original Plan with the single VF containing the optimized recipes - /// using wide loads instead of interleave groups. - /// 2. A new clone which contains all VFs of Plan except the optimized VF. - /// - /// This effectively is a very simple form of loop-aware SLP, where we use - /// interleave groups to identify candidates. - static std::unique_ptr<VPlan> - narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI); + /// Try to convert a plan with interleave groups with VF elements to a plan + /// with the interleave groups replaced by wide loads and stores processing VF + /// elements, if all transformed interleave groups access the full vector + /// width (checked via \o VectorRegWidth). This effectively is a very simple + /// form of loop-aware SLP, where we use interleave groups to identify + /// candidates. + static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, + unsigned VectorRegWidth); /// Predicate and linearize the control-flow in the only loop region of /// \p Plan. If \p FoldTail is true, create a mask guarding the loop diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 0678bc90..83e3fca 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -41,10 +41,10 @@ class VPRecipeBase; class VPInterleaveBase; class VPPhiAccessors; -// This is the base class of the VPlan Def/Use graph, used for modeling the data -// flow into, within and out of the VPlan. VPValues can stand for live-ins -// coming from the input IR and instructions which VPlan will generate if -// executed. +/// This is the base class of the VPlan Def/Use graph, used for modeling the +/// data flow into, within and out of the VPlan. VPValues can stand for live-ins +/// coming from the input IR and instructions which VPlan will generate if +/// executed. class LLVM_ABI_FOR_TEST VPValue { friend class VPDef; friend struct VPDoubleValueDef; @@ -57,7 +57,7 @@ class LLVM_ABI_FOR_TEST VPValue { SmallVector<VPUser *, 1> Users; protected: - // Hold the underlying Value, if any, attached to this VPValue. + /// Hold the underlying Value, if any, attached to this VPValue. Value *UnderlyingVal; /// Pointer to the VPDef that defines this VPValue. If it is nullptr, the |