diff options
author | Philip Reames <listmail@philipreames.com> | 2022-03-15 16:19:55 -0700 |
---|---|---|
committer | Philip Reames <listmail@philipreames.com> | 2022-03-15 16:36:15 -0700 |
commit | 1cfa986d68e2f04854ef30c432b8aa28e13a9706 (patch) | |
tree | c21c8e957846f7eab4b5c5ad5bf4af8f438abc82 /llvm/lib | |
parent | b97856c4cfe7efb13887d5691002a7aa38440924 (diff) | |
download | llvm-1cfa986d68e2f04854ef30c432b8aa28e13a9706.zip llvm-1cfa986d68e2f04854ef30c432b8aa28e13a9706.tar.gz llvm-1cfa986d68e2f04854ef30c432b8aa28e13a9706.tar.bz2 |
[SLP] Optionally preserve MemorySSA
This initial patch adds code to preserve MemorySSA through a run of SLP vectorizer. The eventual plan is to use MemorySSA to accelerate SLP's memory dependence checking, but we're a ways from that. In particular, this patch is correct, but really slow. It's being landed so that we can work incrementally in tree, not because it's expected to be useful to anyone just yet.
The broader effort is being tracked in https://github.com/llvm/llvm-project/issues/54256. Its worth noting expicitly that this may not work out, and if not, we will be reverting all of the MSSA support in SLP at some point in the next few weeks.
Differential Revision: https://reviews.llvm.org/D117926
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 90 |
1 files changed, 84 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5bce8bed..90114f9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -41,6 +41,8 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -166,6 +168,10 @@ static cl::opt<bool> ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); +static cl::opt<bool> EnableMSSAInSLPVectorizer( + "enable-mssa-in-slp-vectorizer", cl::Hidden, cl::init(false), + cl::desc("Enable MemorySSA for SLPVectorizer in new pass manager")); + // Limit the number of alias checks. The limit is chosen so that // it has no negative effect on the llvm benchmarks. static const unsigned AliasedCheckLimit = 10; @@ -789,9 +795,10 @@ public: BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, - const DataLayout *DL, OptimizationRemarkEmitter *ORE) + MemorySSA *MSSA, const DataLayout *DL, OptimizationRemarkEmitter *ORE) : BatchAA(*Aa), F(Func), SE(Se), TTI(Tti), TLI(TLi), LI(Li), - DT(Dt), AC(AC), DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) { + DT(Dt), AC(AC), DB(DB), MSSA(MSSA), DL(DL), ORE(ORE), + Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -2979,6 +2986,7 @@ private: DominatorTree *DT; AssumptionCache *AC; DemandedBits *DB; + MemorySSA *MSSA; const DataLayout *DL; OptimizationRemarkEmitter *ORE; @@ -3091,6 +3099,13 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits { } // end namespace llvm BoUpSLP::~BoUpSLP() { + if (MSSA) { + MemorySSAUpdater MSSAU(MSSA); + for (const auto &Pair : DeletedInstructions) { + if (auto *Access = MSSA->getMemoryAccess(Pair.first)) + MSSAU.removeMemoryAccess(Access); + } + } for (const auto &Pair : DeletedInstructions) { // Replace operands of ignored instructions with Undefs in case if they were // marked for deletion. @@ -6786,6 +6801,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); + if (MSSA) { + MemorySSAUpdater MSSAU(MSSA); + auto *Access = MSSA->getMemoryAccess(LI); + assert(Access); + MemoryUseOrDef *NewAccess = + MSSAU.createMemoryAccessBefore(V, Access->getDefiningAccess(), + Access); + MSSAU.insertUse(cast<MemoryUse>(NewAccess), true); + } Value *NewV = propagateMetadata(V, E->Scalars); ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); @@ -7035,6 +7059,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign()); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); } + + if (MSSA) { + MemorySSAUpdater MSSAU(MSSA); + auto *Access = MSSA->getMemoryAccess(LI); + assert(Access); + MemoryUseOrDef *NewAccess = + MSSAU.createMemoryAccessAfter(NewLI, Access->getDefiningAccess(), + Access); + MSSAU.insertUse(cast<MemoryUse>(NewAccess), true); + } + Value *V = propagateMetadata(NewLI, E->Scalars); ShuffleBuilder.addInversedMask(E->ReorderIndices); @@ -7060,6 +7095,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign()); + if (MSSA) { + MemorySSAUpdater MSSAU(MSSA); + auto *Access = MSSA->getMemoryAccess(SI); + assert(Access); + MemoryUseOrDef *NewAccess = + MSSAU.createMemoryAccessAfter(ST, Access->getDefiningAccess(), + Access); + MSSAU.insertDef(cast<MemoryDef>(NewAccess), true); + } + // The pointer operand uses an in-tree scalar, so add the new BitCast or // StoreInst to ExternalUses to make sure that an extract will be // generated in the future. @@ -8024,6 +8069,15 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { BS->initialFillReadyList(ReadyInsts); Instruction *LastScheduledInst = BS->ScheduleEnd; + MemoryAccess *MemInsertPt = nullptr; + if (MSSA) { + for (auto I = LastScheduledInst->getIterator(); I != BS->BB->end(); I++) { + if (auto *Access = MSSA->getMemoryAccess(&*I)) { + MemInsertPt = Access; + break; + } + } + } // Do the "real" scheduling. while (!ReadyInsts.empty()) { @@ -8035,9 +8089,24 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { for (ScheduleData *BundleMember = picked; BundleMember; BundleMember = BundleMember->NextInBundle) { Instruction *pickedInst = BundleMember->Inst; - if (pickedInst->getNextNode() != LastScheduledInst) + if (pickedInst->getNextNode() != LastScheduledInst) { pickedInst->moveBefore(LastScheduledInst); + if (MSSA) { + MemorySSAUpdater MSSAU(MSSA); + if (auto *Access = MSSA->getMemoryAccess(pickedInst)) { + if (MemInsertPt) + MSSAU.moveBefore(Access, cast<MemoryUseOrDef>(MemInsertPt)); + else + MSSAU.moveToPlace(Access, BS->BB, + MemorySSA::InsertionPlace::End); + } + } + } + LastScheduledInst = pickedInst; + if (MSSA) + if (auto *Access = MSSA->getMemoryAccess(LastScheduledInst)) + MemInsertPt = Access; } BS->schedule(picked, ReadyInsts); @@ -8383,7 +8452,7 @@ struct SLPVectorizer : public FunctionPass { auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); - return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); + return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, /*MSSA*/nullptr, ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -8417,13 +8486,21 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A auto *AC = &AM.getResult<AssumptionAnalysis>(F); auto *DB = &AM.getResult<DemandedBitsAnalysis>(F); auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F); + auto *MSSA = EnableMSSAInSLPVectorizer ? + &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : (MemorySSA*)nullptr; - bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); + bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, MSSA, ORE); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet<CFGAnalyses>(); + if (MSSA) { +#ifdef EXPENSIVE_CHECKS + MSSA->verifyMemorySSA(); +#endif + PA.preserve<MemorySSAAnalysis>(); + } return PA; } @@ -8432,6 +8509,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, + MemorySSA *MSSA, OptimizationRemarkEmitter *ORE_) { if (!RunSLPVectorization) return false; @@ -8465,7 +8543,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, MSSA, DL, ORE_); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. |