diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CloneFunction.cpp | 67 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp | 106 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.h | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp | 8 |
15 files changed, 213 insertions, 94 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 82ac903..3f11cae 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign( // 2) (fp_binop ({s|u}itofp x), FpC) // -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC))) Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) { + // Don't perform the fold on vectors, as the integer operation may be much + // more expensive than the float operation in that case. + if (BO.getType()->isVectorTy()) + return nullptr; + std::array<Value *, 2> IntOps = {nullptr, nullptr}; Constant *Op1FpC = nullptr; // Check for: diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 66a2c76..09db464 100644 --- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -2626,7 +2626,7 @@ void ObjCARCOpt::OptimizeAutoreleasePools(Function &F) { case ARCInstKind::Call: if (!MayAutorelease(cast<CallBase>(Inst))) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: case ARCInstKind::FusedRetainAutorelease: diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 56e0569..7cae94eb 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -1295,6 +1295,24 @@ public: return commonAlignment(InitialAlign, ElementSizeInBits / 8); } + IntegerType *getIndexType(Value *Ptr) const { + return cast<IntegerType>(DL.getIndexType(Ptr->getType())); + } + + Value *getIndex(Value *Ptr, uint64_t V) const { + return ConstantInt::get(getIndexType(Ptr), V); + } + + Value *castToIndexType(Value *Ptr, Value *V, IRBuilder<> &Builder) const { + assert(isa<IntegerType>(V->getType()) && + "Attempted to cast non-integral type to integer index"); + // In case the data layout's index type differs in width from the type of + // the value we're given, truncate or zero extend to the appropriate width. + // We zero extend here as indices are unsigned. + return Builder.CreateZExtOrTrunc(V, getIndexType(Ptr), + V->getName() + ".cast"); + } + /// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between /// vectors. MatrixTy loadMatrix(Type *Ty, Value *Ptr, MaybeAlign MAlign, Value *Stride, @@ -1304,6 +1322,7 @@ public: Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride()); Value *EltPtr = Ptr; MatrixTy Result; + Stride = castToIndexType(Ptr, Stride, Builder); for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) { Value *GEP = computeVectorAddr( EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I), @@ -1325,14 +1344,14 @@ public: ShapeInfo ResultShape, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows * ResultShape.NumColumns); return loadMatrix(TileTy, TileStart, Align, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, ResultShape, Builder); } @@ -1363,14 +1382,15 @@ public: MaybeAlign MAlign, bool IsVolatile, ShapeInfo MatrixShape, Value *I, Value *J, Type *EltTy, IRBuilder<> &Builder) { Value *Offset = Builder.CreateAdd( - Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I); + Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I); Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset); auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() * StoreVal.getNumColumns()); storeMatrix(TileTy, StoreVal, TileStart, MAlign, - Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder); + getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile, + Builder); } /// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between @@ -1380,6 +1400,7 @@ public: IRBuilder<> &Builder) { auto *VType = cast<FixedVectorType>(Ty); Value *EltPtr = Ptr; + Stride = castToIndexType(Ptr, Stride, Builder); for (auto Vec : enumerate(StoreVal.vectors())) { Value *GEP = computeVectorAddr( EltPtr, @@ -2011,18 +2032,17 @@ public: const unsigned TileM = std::min(M - K, unsigned(TileSize)); MatrixTy A = loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(), - LShape, Builder.getInt64(I), Builder.getInt64(K), + LShape, getIndex(APtr, I), getIndex(APtr, K), {TileR, TileM}, EltType, Builder); MatrixTy B = loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(), - RShape, Builder.getInt64(K), Builder.getInt64(J), + RShape, getIndex(BPtr, K), getIndex(BPtr, J), {TileM, TileC}, EltType, Builder); emitMatrixMultiply(Res, A, B, Builder, true, false, getFastMathFlags(MatMul)); } storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M}, - Builder.getInt64(I), Builder.getInt64(J), EltType, - Builder); + getIndex(CPtr, I), getIndex(CPtr, J), EltType, Builder); } } @@ -2254,15 +2274,14 @@ public: /// Lower load instructions. MatrixTy VisitLoad(LoadInst *Inst, const ShapeInfo &SI, Value *Ptr, IRBuilder<> &Builder) { - return LowerLoad(Inst, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, - Builder); + return LowerLoad(Inst, Ptr, Inst->getAlign(), getIndex(Ptr, SI.getStride()), + Inst->isVolatile(), SI, Builder); } MatrixTy VisitStore(StoreInst *Inst, const ShapeInfo &SI, Value *StoredVal, Value *Ptr, IRBuilder<> &Builder) { return LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(), - Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI, + getIndex(Ptr, SI.getStride()), Inst->isVolatile(), SI, Builder); } diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index b187208..3ce569f 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -44,7 +44,7 @@ using namespace llvm; STATISTIC(RemappedAtomMax, "Highest global NextAtomGroup (after mapping)"); void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { - auto CurGroup = DL->getAtomGroup(); + uint64_t CurGroup = DL->getAtomGroup(); if (!CurGroup) return; @@ -62,21 +62,20 @@ void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) { RemappedAtomMax = std::max<uint64_t>(NewGroup, RemappedAtomMax); } -namespace { -void collectDebugInfoFromInstructions(const Function &F, - DebugInfoFinder &DIFinder) { +static void collectDebugInfoFromInstructions(const Function &F, + DebugInfoFinder &DIFinder) { const Module *M = F.getParent(); - if (M) { - // Inspect instructions to process e.g. DILexicalBlocks of inlined functions - for (const auto &I : instructions(F)) - DIFinder.processInstruction(*M, I); - } + if (!M) + return; + // Inspect instructions to process e.g. DILexicalBlocks of inlined functions + for (const Instruction &I : instructions(F)) + DIFinder.processInstruction(*M, I); } // Create a predicate that matches the metadata that should be identity mapped // during function cloning. -MetadataPredicate createIdentityMDPredicate(const Function &F, - CloneFunctionChangeType Changes) { +static MetadataPredicate +createIdentityMDPredicate(const Function &F, CloneFunctionChangeType Changes) { if (Changes >= CloneFunctionChangeType::DifferentModule) return [](const Metadata *MD) { return false; }; @@ -107,7 +106,6 @@ MetadataPredicate createIdentityMDPredicate(const Function &F, return false; }; } -} // namespace /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, @@ -213,10 +211,9 @@ void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc, const MetadataPredicate *IdentityMD) { SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc.getAllMetadata(MDs); - for (auto MD : MDs) { - NewFunc.addMetadata(MD.first, - *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper, - Materializer, IdentityMD)); + for (const auto &[Kind, MD] : MDs) { + NewFunc.addMetadata(Kind, *MapMetadata(MD, VMap, RemapFlag, TypeMapper, + Materializer, IdentityMD)); } } @@ -235,7 +232,6 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc, // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. for (const BasicBlock &BB : OldFunc) { - // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, &NewFunc, CodeInfo); @@ -321,7 +317,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Cloning is always a Module level operation, since Metadata needs to be // cloned. - const auto RemapFlag = RF_None; + const RemapFlags RemapFlag = RF_None; CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper, Materializer, &IdentityMD); @@ -346,8 +342,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // visiting the metadata attached to global values, which would allow this // code to be deleted. Alternatively, perhaps give responsibility for this // update to CloneFunctionInto's callers. - auto *NewModule = NewFunc->getParent(); - auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + Module *NewModule = NewFunc->getParent(); + NamedMDNode *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); // Avoid multiple insertions of the same DICompileUnit to NMD. SmallPtrSet<const void *, 8> Visited(llvm::from_range, NMD->operands()); @@ -355,7 +351,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // the function (e.g. as instructions' scope). DebugInfoFinder DIFinder; collectDebugInfoFromInstructions(*OldFunc, DIFinder); - for (auto *Unit : DIFinder.compile_units()) { + for (DICompileUnit *Unit : DIFinder.compile_units()) { MDNode *MappedUnit = MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer); if (Visited.insert(MappedUnit).second) @@ -821,17 +817,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, --PredCount[Pred]; // Figure out how many entries to remove from each PHI. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - ++PredCount[PN->getIncomingBlock(i)]; + for (BasicBlock *Pred : PN->blocks()) + ++PredCount[Pred]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { - for (const auto &PCI : PredCount) { - BasicBlock *Pred = PCI.first; - for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove) + for (const auto &[Pred, Count] : PredCount) { + for (unsigned _ : llvm::seq<unsigned>(Count)) PN->removeIncomingValue(Pred, false); } } @@ -866,8 +861,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // As phi-nodes have been now remapped, allow incremental simplification of // newly-cloned instructions. const DataLayout &DL = NewFunc->getDataLayout(); - for (const auto &BB : *OldFunc) { - for (const auto &I : BB) { + for (const BasicBlock &BB : *OldFunc) { + for (const Instruction &I : BB) { auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I)); if (!NewI) continue; @@ -997,8 +992,8 @@ void llvm::CloneAndPruneFunctionInto( void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks, ValueToValueMapTy &VMap) { // Rewrite the code to refer to itself. - for (auto *BB : Blocks) { - for (auto &Inst : *BB) { + for (BasicBlock *BB : Blocks) { + for (Instruction &Inst : *BB) { RemapDbgRecordRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); RemapInstruction(&Inst, VMap, @@ -1151,9 +1146,9 @@ void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, StringRef Ext, LLVMContext &Context) { MDBuilder MDB(Context); - for (auto *ScopeList : NoAliasDeclScopes) { - for (const auto &MDOperand : ScopeList->operands()) { - if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) { + for (MDNode *ScopeList : NoAliasDeclScopes) { + for (const MDOperand &MDOp : ScopeList->operands()) { + if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { AliasScopeNode SNANode(MD); std::string Name; @@ -1177,7 +1172,7 @@ void llvm::adaptNoAliasScopes(Instruction *I, auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * { bool NeedsReplacement = false; SmallVector<Metadata *, 8> NewScopeList; - for (const auto &MDOp : ScopeList->operands()) { + for (const MDOperand &MDOp : ScopeList->operands()) { if (MDNode *MD = dyn_cast<MDNode>(MDOp)) { if (auto *NewMD = ClonedScopes.lookup(MD)) { NewScopeList.push_back(NewMD); @@ -1193,12 +1188,12 @@ void llvm::adaptNoAliasScopes(Instruction *I, }; if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I)) - if (auto *NewScopeList = CloneScopeList(Decl->getScopeList())) + if (MDNode *NewScopeList = CloneScopeList(Decl->getScopeList())) Decl->setScopeList(NewScopeList); auto replaceWhenNeeded = [&](unsigned MD_ID) { if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) - if (auto *NewScopeList = CloneScopeList(CSNoAlias)) + if (MDNode *NewScopeList = CloneScopeList(CSNoAlias)) I->setMetadata(MD_ID, NewScopeList); }; replaceWhenNeeded(LLVMContext::MD_noalias); diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp index d7bf791..ccb86eb 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" @@ -112,7 +112,7 @@ struct BBValueInfo { void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, SmallVectorImpl<PHINode *> *InsertedPHIs) { DenseMap<BasicBlock *, BBValueInfo> BBInfos; - for (auto &R : Rewrites) { + for (RewriteInfo &R : Rewrites) { BBInfos.clear(); // Compute locations for new phi-nodes. @@ -145,7 +145,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, BBInfos[BB].LiveOutValue = V; // We've computed IDF, now insert new phi-nodes there. - for (auto *FrontierBB : IDFBlocks) { + for (BasicBlock *FrontierBB : IDFBlocks) { IRBuilder<> B(FrontierBB, FrontierBB->begin()); PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); BBInfos[FrontierBB].LiveInValue = PN; @@ -156,7 +156,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, // IsLiveOut indicates whether we are computing live-out values (true) or // live-in values (false). auto ComputeValue = [&](BasicBlock *BB, bool IsLiveOut) -> Value * { - auto *BBInfo = &BBInfos[BB]; + BBValueInfo *BBInfo = &BBInfos[BB]; if (IsLiveOut && BBInfo->LiveOutValue) return BBInfo->LiveOutValue; @@ -187,7 +187,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, if (!V) V = UndefValue::get(R.Ty); - for (auto *BBInfo : Stack) + for (BBValueInfo *BBInfo : Stack) // Loop above can insert new entries into the BBInfos map: assume the // map shouldn't grow due to [1] and BBInfo references are valid. BBInfo->LiveInValue = V; @@ -196,7 +196,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, }; // Fill in arguments of the inserted PHIs. - for (auto *BB : IDFBlocks) { + for (BasicBlock *BB : IDFBlocks) { auto *PHI = cast<PHINode>(&BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PHI->addIncoming(ComputeValue(Pred, /*IsLiveOut=*/true), Pred); @@ -222,3 +222,97 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, } } } + +// Perform a single pass of simplification over the worklist of PHIs. +// This should be called after RewriteAllUses() because simplifying PHIs +// immediately after creation would require updating all references to those +// PHIs in the BBValueInfo structures, which would necessitate additional +// reference tracking overhead. +static void simplifyPass(MutableArrayRef<PHINode *> Worklist, + const DataLayout &DL) { + for (PHINode *&PHI : Worklist) { + if (Value *Simplified = simplifyInstruction(PHI, DL)) { + PHI->replaceAllUsesWith(Simplified); + PHI->eraseFromParent(); + PHI = nullptr; // Mark as removed. + } + } +} + +#ifndef NDEBUG // Should this be under EXPENSIVE_CHECKS? +// New PHI nodes should not reference one another but they may reference +// themselves or existing PHI nodes, and existing PHI nodes may reference new +// PHI nodes. +static bool +PHIAreRefEachOther(const iterator_range<BasicBlock::phi_iterator> NewPHIs) { + SmallPtrSet<PHINode *, 8> NewPHISet; + for (PHINode &PN : NewPHIs) + NewPHISet.insert(&PN); + for (PHINode &PHI : NewPHIs) { + for (Value *V : PHI.incoming_values()) { + PHINode *IncPHI = dyn_cast<PHINode>(V); + if (IncPHI && IncPHI != &PHI && NewPHISet.contains(IncPHI)) + return true; + } + } + return false; +} +#endif + +static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) { + if (!PHI.isIdenticalToWhenDefined(&ReplPHI)) + return false; + PHI.replaceAllUsesWith(&ReplPHI); + PHI.eraseFromParent(); + return true; +} + +bool EliminateNewDuplicatePHINodes(BasicBlock *BB, + BasicBlock::phi_iterator FirstExistingPN) { + auto NewPHIs = make_range(BB->phis().begin(), FirstExistingPN); + assert(!PHIAreRefEachOther(NewPHIs)); + + // Deduplicate new PHIs first to reduce the number of comparisons on the + // following new -> existing pass. + bool Changed = false; + for (auto I = BB->phis().begin(); I != FirstExistingPN; ++I) { + for (auto J = std::next(I); J != FirstExistingPN;) { + Changed |= replaceIfIdentical(*J++, *I); + } + } + + // Iterate over existing PHIs and replace identical new PHIs. + for (PHINode &ExistingPHI : make_range(FirstExistingPN, BB->phis().end())) { + auto I = BB->phis().begin(); + assert(I != FirstExistingPN); // Should be at least one new PHI. + do { + Changed |= replaceIfIdentical(*I++, ExistingPHI); + } while (I != FirstExistingPN); + if (BB->phis().begin() == FirstExistingPN) + return Changed; + } + return Changed; +} + +static void deduplicatePass(ArrayRef<PHINode *> Worklist) { + SmallDenseMap<BasicBlock *, unsigned> BBs; + for (PHINode *PHI : Worklist) { + if (PHI) + ++BBs[PHI->getParent()]; + } + + for (auto [BB, NumNewPHIs] : BBs) { + auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs); + EliminateNewDuplicatePHINodes(BB, FirstExistingPN); + } +} + +void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) { + SmallVector<PHINode *, 4> PHIs; + RewriteAllUses(&DT, &PHIs); + if (PHIs.empty()) + return; + + simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout()); + deduplicatePass(PHIs); +} diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a6f4bec..f95d288 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10659,7 +10659,8 @@ class InstructionsCompatibilityAnalysis { static bool isSupportedOpcode(const unsigned Opcode) { return Opcode == Instruction::Add || Opcode == Instruction::LShr || Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv; + Opcode == Instruction::UDiv || Opcode == Instruction::And || + Opcode == Instruction::Or || Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10984,6 +10985,9 @@ public: case Instruction::Shl: case Instruction::SDiv: case Instruction::UDiv: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind); break; default: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 1fea068..0101942 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -635,9 +635,9 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { const VPRecipeBase *R = &VPBB->back(); bool IsSwitch = isa<VPInstruction>(R) && cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; - bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || - match(R, m_BranchOnCond(m_VPValue())) || - match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); + bool IsCondBranch = + isa<VPBranchOnMaskRecipe>(R) || + match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount())); (void)IsCondBranch; (void)IsSwitch; if (VPBB->getNumSuccessors() == 2 || diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fb696be..8ca3bed 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1064,6 +1064,7 @@ public: ResumeForEpilogue, /// Returns the value for vscale. VScale, + OpsEnd = VScale, }; /// Returns true if this VPInstruction generates scalar values for all lanes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 81deba2..c0147ce 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -433,8 +433,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, // We are about to replace the branch to exit the region. Remove the original // BranchOnCond, if there is any. DebugLoc LatchDL = DL; - if (!LatchVPBB->empty() && - match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue()))) { + if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) { LatchDL = LatchVPBB->getTerminator()->getDebugLoc(); LatchVPBB->getTerminator()->eraseFromParent(); } @@ -480,8 +479,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) { static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor()); canonicalHeaderAndLatch(HeaderVPBB, VPDT); @@ -623,8 +621,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan, } void VPlanTransforms::createLoopRegions(VPlan &Plan) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry())) if (canonicalHeaderAndLatch(HeaderVPB, VPDT)) createLoopRegion(Plan, HeaderVPB); @@ -875,8 +872,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { Plan.getVectorLoopRegion()->getEntryBasicBlock())) { auto *VPBB = cast<VPBasicBlock>(VPB); for (auto &R : *VPBB) { - if (R.mayWriteToMemory() && - !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount())) return false; } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h index 577432f..44506f5a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h +++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h @@ -39,7 +39,6 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> { using Base = DominatorTreeBase<VPBlockBase, false>; public: - VPDominatorTree() = default; explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); } /// Returns true if \p A properly dominates \p B. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index 555efea..b42b049 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -344,6 +344,10 @@ m_Freeze(const Op0_t &Op0) { return m_VPInstruction<Instruction::Freeze>(Op0); } +inline VPInstruction_match<VPInstruction::BranchOnCond> m_BranchOnCond() { + return m_VPInstruction<VPInstruction::BranchOnCond>(); +} + template <typename Op0_t> inline VPInstruction_match<VPInstruction::BranchOnCond, Op0_t> m_BranchOnCond(const Op0_t &Op0) { @@ -374,6 +378,10 @@ m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { return m_VPInstruction<VPInstruction::ActiveLaneMask>(Op0, Op1, Op2); } +inline VPInstruction_match<VPInstruction::BranchOnCount> m_BranchOnCount() { + return m_VPInstruction<VPInstruction::BranchOnCount>(); +} + template <typename Op0_t, typename Op1_t> inline VPInstruction_match<VPInstruction::BranchOnCount, Op0_t, Op1_t> m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 8e916772..2368d18 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1154,7 +1154,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF, case VPInstruction::ExtractPenultimateElement: if (VF == ElementCount::getScalable(1)) return InstructionCost::getInvalid(); - LLVM_FALLTHROUGH; + [[fallthrough]]; default: // TODO: Compute cost other VPInstructions once the legacy cost model has // been retired. @@ -2855,7 +2855,7 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF, case ExpressionTypes::ExtNegatedMulAccReduction: assert(Opcode == Instruction::Add && "Unexpected opcode"); Opcode = Instruction::Sub; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ExpressionTypes::ExtMulAccReduction: { return Ctx.TTI.getMulAccReductionCost( cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() == diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 9bb8820..40b7e8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1658,7 +1658,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, auto *Term = &ExitingVPBB->back(); VPValue *Cond; ScalarEvolution &SE = *PSE.getSE(); - if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) || + if (match(Term, m_BranchOnCount()) || match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask( m_VPValue(), m_VPValue(), m_VPValue()))))) { // Try to simplify the branch condition if TC <= VF * UF when the latch @@ -1909,8 +1909,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &LoopBuilder) { - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis; for (VPRecipeBase &R : @@ -1992,6 +1991,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { .Case<VPWidenIntrinsicRecipe>([](auto *I) { return std::make_pair(true, I->getVectorIntrinsicID()); }) + .Case<VPVectorPointerRecipe>([](auto *I) { + // For recipes that do not directly map to LLVM IR instructions, + // assign opcodes after the last VPInstruction opcode (which is also + // after the last IR Instruction opcode), based on the VPDefID. + return std::make_pair(false, + VPInstruction::OpsEnd + 1 + I->getVPDefID()); + }) .Default([](auto *) { return std::nullopt; }); } @@ -2015,11 +2021,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { static bool canHandle(const VPSingleDefRecipe *Def) { // We can extend the list of handled recipes in the future, // provided we account for the data embedded in them while checking for - // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode, - // as it is essentially a GEP with different semantics. - auto C = isa<VPVectorPointerRecipe>(Def) - ? std::make_pair(false, Instruction::GetElementPtr) - : getOpcodeOrIntrinsicID(Def); + // equality or hashing. + auto C = getOpcodeOrIntrinsicID(Def); // The issue with (Insert|Extract)Value is that the index of the // insert/extract is not a proper operand in LLVM IR, and hence also not in @@ -2058,6 +2061,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> { vputils::isSingleScalar(L) != vputils::isSingleScalar(R) || !equal(L->operands(), R->operands())) return false; + assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) && + "must have valid opcode info for both recipes"); if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L)) if (LFlags->hasPredicate() && LFlags->getPredicate() != @@ -3021,8 +3026,7 @@ void VPlanTransforms::createInterleaveGroups( // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a // single VPInterleaveRecipe at its insertion point. - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); for (const auto *IG : InterleaveGroups) { auto *Start = cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0))); @@ -3398,9 +3402,8 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBuilder Builder(LatchVPBB->getTerminator()); VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0]; - assert( - match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond(m_VPValue())) && - "Terminator must be be BranchOnCond"); + assert(match(EarlyExitingVPBB->getTerminator(), m_BranchOnCond()) && + "Terminator must be be BranchOnCond"); VPValue *CondOfEarlyExitingVPBB = EarlyExitingVPBB->getTerminator()->getOperand(0); auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB @@ -3662,8 +3665,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { return; #ifndef NDEBUG - VPDominatorTree VPDT; - VPDT.recalculate(Plan); + VPDominatorTree VPDT(Plan); #endif SmallVector<VPValue *> VPValues; @@ -4009,8 +4011,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, unsigned VFMinVal = VF.getKnownMinValue(); SmallVector<VPInterleaveRecipe *> StoreGroups; for (auto &R : *VectorLoop->getEntryBasicBlock()) { - if (isa<VPCanonicalIVPHIRecipe>(&R) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount())) continue; if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 5e7f19f..1c4adfc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -259,8 +259,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, /// Handle non-header-phi recipes. void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { - if (match(&R, m_BranchOnCond(m_VPValue())) || - match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + if (match(&R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) return; if (auto *VPI = dyn_cast<VPInstruction>(&R)) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 013ea2e..5262af6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -24,6 +24,7 @@ #define DEBUG_TYPE "loop-vectorize" using namespace llvm; +using namespace VPlanPatternMatch; namespace { class VPlanVerifier { @@ -198,7 +199,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { } // EVLIVIncrement is only used by EVLIV & BranchOnCount. // Having more than two users is unexpected. - using namespace llvm::VPlanPatternMatch; if (I->getOpcode() != VPInstruction::Broadcast && I->getNumUsers() != 1 && (I->getNumUsers() != 2 || @@ -479,8 +479,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end())); - if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount && - LastInst->getOpcode() != VPInstruction::BranchOnCond)) { + if (!match(LastInst, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()))) { errs() << "VPlan vector loop exit must end with BranchOnCount or " "BranchOnCond VPInstruction\n"; return false; @@ -490,8 +489,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) { } bool llvm::verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate) { - VPDominatorTree VPDT; - VPDT.recalculate(const_cast<VPlan &>(Plan)); + VPDominatorTree VPDT(const_cast<VPlan &>(Plan)); VPTypeAnalysis TypeInfo(Plan); VPlanVerifier Verifier(VPDT, TypeInfo, VerifyLate); return Verifier.verify(Plan); |