aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCloner.h9
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h9
-rw-r--r--llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Coroutines/SpillUtils.cpp37
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfUse.cpp55
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp67
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp105
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp21
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp25
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp3
19 files changed, 253 insertions, 149 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index 26ec4f3..e05fe28 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -1,3 +1,4 @@
+//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,9 +20,7 @@
#include "llvm/Transforms/Coroutines/CoroInstr.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
-namespace llvm {
-
-namespace coro {
+namespace llvm::coro {
enum class CloneKind {
/// The shared resume function for a switch lowering.
@@ -149,8 +148,6 @@ public:
}
};
-} // end namespace coro
-
-} // end namespace llvm
+} // end namespace llvm::coro
#endif // LLVM_LIB_TRANSFORMS_COROUTINES_COROCLONER_H
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 471b9eb..cdb5852 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -38,7 +38,7 @@ public:
AnyResumeFnPtrTy(PointerType::getUnqual(Context)) {}
void lowerEarlyIntrinsics(Function &F);
};
-}
+} // namespace
// Replace a direct call to coro.resume or coro.destroy with an indirect call to
// an address returned by coro.subfn.addr intrinsic. This is done so that
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 52f4ffe..cc47a55 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -16,11 +16,7 @@
#include "llvm/Transforms/Coroutines/CoroInstr.h"
#include "llvm/Transforms/Coroutines/CoroShape.h"
-namespace llvm {
-
-class CallGraph;
-
-namespace coro {
+namespace llvm::coro {
bool isSuspendBlock(BasicBlock *BB);
bool declaresAnyIntrinsic(const Module &M);
@@ -61,7 +57,6 @@ void normalizeCoroutine(Function &F, coro::Shape &Shape,
CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
TargetTransformInfo &TTI,
ArrayRef<Value *> Arguments, IRBuilder<> &);
-} // End namespace coro.
-} // End namespace llvm
+} // End namespace llvm::coro
#endif
diff --git a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
index 6aaabca..f2444da 100644
--- a/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/MaterializationUtils.cpp
@@ -137,8 +137,7 @@ struct RematGraph {
} // namespace
-namespace llvm {
-template <> struct GraphTraits<RematGraph *> {
+template <> struct llvm::GraphTraits<RematGraph *> {
using NodeRef = RematGraph::RematNode *;
using ChildIteratorType = RematGraph::RematNode **;
@@ -149,8 +148,6 @@ template <> struct GraphTraits<RematGraph *> {
static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
};
-} // end namespace llvm
-
// For each instruction identified as materializable across the suspend point,
// and its associated DAG of other rematerializable instructions,
// recreate the DAG of instructions after the suspend point.
diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
index e474c07..81fe0c9 100644
--- a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
+++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp
@@ -16,11 +16,8 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-namespace llvm {
-
-namespace coro {
-
-namespace {
+using namespace llvm;
+using namespace llvm::coro;
typedef SmallPtrSet<BasicBlock *, 8> VisitedBlocksSet;
@@ -71,7 +68,7 @@ static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
/// This happens during the all-instructions iteration, so it must not
/// delete the call.
static Instruction *
-lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape,
+lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const Shape &Shape,
SmallVectorImpl<Instruction *> &DeadInsts) {
IRBuilder<> Builder(AI);
auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
@@ -450,10 +447,8 @@ static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape,
Visitor.getMayWriteBeforeCoroBegin());
}
-} // namespace
-
-void collectSpillsFromArgs(SpillInfo &Spills, Function &F,
- const SuspendCrossingInfo &Checker) {
+void coro::collectSpillsFromArgs(SpillInfo &Spills, Function &F,
+ const SuspendCrossingInfo &Checker) {
// Collect the spills for arguments and other not-materializable values.
for (Argument &A : F.args())
for (User *U : A.users())
@@ -461,7 +456,7 @@ void collectSpillsFromArgs(SpillInfo &Spills, Function &F,
Spills[&A].push_back(cast<Instruction>(U));
}
-void collectSpillsAndAllocasFromInsts(
+void coro::collectSpillsAndAllocasFromInsts(
SpillInfo &Spills, SmallVector<AllocaInfo, 8> &Allocas,
SmallVector<Instruction *, 4> &DeadInstructions,
SmallVector<CoroAllocaAllocInst *, 4> &LocalAllocas, Function &F,
@@ -516,8 +511,8 @@ void collectSpillsAndAllocasFromInsts(
}
}
-void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
- const SuspendCrossingInfo &Checker) {
+void coro::collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
+ const SuspendCrossingInfo &Checker) {
// We don't want the layout of coroutine frame to be affected
// by debug information. So we only choose to salvage dbg.values for
// whose value is already in the frame.
@@ -535,10 +530,9 @@ void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F,
/// Async and Retcon{Once} conventions assume that all spill uses can be sunk
/// after the coro.begin intrinsic.
-void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom,
- CoroBeginInst *CoroBegin,
- coro::SpillInfo &Spills,
- SmallVectorImpl<coro::AllocaInfo> &Allocas) {
+void coro::sinkSpillUsesAfterCoroBegin(
+ const DominatorTree &Dom, CoroBeginInst *CoroBegin, coro::SpillInfo &Spills,
+ SmallVectorImpl<coro::AllocaInfo> &Allocas) {
SmallSetVector<Instruction *, 32> ToMove;
SmallVector<Instruction *, 32> Worklist;
@@ -582,8 +576,9 @@ void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom,
Inst->moveBefore(InsertPt->getIterator());
}
-BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def,
- const DominatorTree &DT) {
+BasicBlock::iterator coro::getSpillInsertionPt(const coro::Shape &Shape,
+ Value *Def,
+ const DominatorTree &DT) {
BasicBlock::iterator InsertPt;
if (auto *Arg = dyn_cast<Argument>(Def)) {
// For arguments, we will place the store instruction right after
@@ -625,7 +620,3 @@ BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def,
return InsertPt;
}
-
-} // End namespace coro.
-
-} // End namespace llvm.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7071876..943c223 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -471,7 +471,6 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
-public:
/// Create `select C, S1, S2`. Use only when the profile cannot be calculated
/// from existing profile metadata: if the Function has profiles, this will
/// set the profile of this select to "unknown".
@@ -484,6 +483,7 @@ public:
return Sel;
}
+public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 63e24a0..a330bb7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -110,8 +110,8 @@ static Value *simplifyShiftSelectingPackedElement(Instruction *I,
ShrAmt->getName() + ".z");
// There is no existing !prof metadata we can derive the !prof metadata for
// this select.
- Value *Select = IC.createSelectInstWithUnknownProfile(ShrAmtZ, Lower, Upper);
- IC.Builder.Insert(Select);
+ Value *Select = IC.Builder.CreateSelectWithUnknownProfile(ShrAmtZ, Lower,
+ Upper, DEBUG_TYPE);
Select->takeName(I);
return Select;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 82ac903..3f11cae 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
// 2) (fp_binop ({s|u}itofp x), FpC)
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
+ // Don't perform the fold on vectors, as the integer operation may be much
+ // more expensive than the float operation in that case.
+ if (BO.getType()->isVectorTy())
+ return nullptr;
+
std::array<Value *, 2> IntOps = {nullptr, nullptr};
Constant *Op1FpC = nullptr;
// Check for:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index c86092b..a6ec6c1 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
@@ -194,6 +195,30 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}
+static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
+ AnnotationKind Kind) {
+ assert(Kind != llvm::memprof::AnnotationKind::AnnotationOK &&
+ "Should not handle AnnotationOK here");
+ SmallString<32> Reason;
+ switch (Kind) {
+ case llvm::memprof::AnnotationKind::ExplicitSection:
+ ++NumOfMemProfExplicitSectionGlobalVars;
+ Reason.append("explicit section name");
+ break;
+ case llvm::memprof::AnnotationKind::DeclForLinker:
+ Reason.append("linker declaration");
+ break;
+ case llvm::memprof::AnnotationKind::ReservedName:
+ Reason.append("name starts with `llvm.`");
+ break;
+ default:
+ llvm_unreachable("Unexpected annotation kind");
+ }
+ LLVM_DEBUG(dbgs() << "Skip annotation for " << GVar.getName() << " due to "
+ << Reason << ".\n");
+ return;
+}
+
struct AllocMatchInfo {
uint64_t TotalSize = 0;
AllocationType AllocType = AllocationType::None;
@@ -775,29 +800,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
-// Returns true iff the global variable has custom section either by
-// __attribute__((section("name")))
-// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
-// or #pragma clang section directives
-// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
-static bool hasExplicitSectionName(const GlobalVariable &GVar) {
- if (GVar.hasSection())
- return true;
-
- auto Attrs = GVar.getAttributes();
- if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
- Attrs.hasAttribute("relro-section") ||
- Attrs.hasAttribute("rodata-section"))
- return true;
- return false;
-}
-
bool MemProfUsePass::annotateGlobalVariables(
Module &M, const memprof::DataAccessProfData *DataAccessProf) {
if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
return false;
if (!DataAccessProf) {
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 0U);
M.getContext().diagnose(DiagnosticInfoPGOProfile(
MemoryProfileFileName.data(),
StringRef("Data access profiles not found in memprof. Ignore "
@@ -805,6 +814,7 @@ bool MemProfUsePass::annotateGlobalVariables(
DS_Warning));
return false;
}
+ M.addModuleFlag(Module::Warning, "EnableDataAccessProf", 1U);
bool Changed = false;
// Iterate all global variables in the module and annotate them based on
@@ -815,13 +825,9 @@ bool MemProfUsePass::annotateGlobalVariables(
for (GlobalVariable &GVar : M.globals()) {
assert(!GVar.getSectionPrefix().has_value() &&
"GVar shouldn't have section prefix yet");
- if (GVar.isDeclarationForLinker())
- continue;
-
- if (hasExplicitSectionName(GVar)) {
- ++NumOfMemProfExplicitSectionGlobalVars;
- LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
- << " has explicit section name. Skip annotating.\n");
+ auto Kind = llvm::memprof::getAnnotationKind(GVar);
+ if (Kind != llvm::memprof::AnnotationKind::AnnotationOK) {
+ HandleUnsupportedAnnotationKinds(GVar, Kind);
continue;
}
@@ -831,7 +837,6 @@ bool MemProfUsePass::annotateGlobalVariables(
// TODO: Track string content hash in the profiles and compute it inside the
// compiler to categeorize the hotness string literals.
if (Name.starts_with(".str")) {
-
LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 56e0569..7cae94eb 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -1295,6 +1295,24 @@ public:
return commonAlignment(InitialAlign, ElementSizeInBits / 8);
}
+ IntegerType *getIndexType(Value *Ptr) const {
+ return cast<IntegerType>(DL.getIndexType(Ptr->getType()));
+ }
+
+ Value *getIndex(Value *Ptr, uint64_t V) const {
+ return ConstantInt::get(getIndexType(Ptr), V);
+ }
+
+ Value *castToIndexType(Value *Ptr, Value *V, IRBuilder<> &Builder) const {
+ assert(isa<IntegerType>(V->getType()) &&
+ "Attempted to cast non-integral type to integer index");
+ // In case the data layout's index type differs in width from the type of
+ // the value we're given, truncate or zero extend to the appropriate width.
+ // We zero extend here as indices are unsigned.
+ return Builder.CreateZExtOrTrunc(V, getIndexType(Ptr),
+ V->getName() + ".cast");
+ }
+
/// Load a matrix with \p Shape starting at \p Ptr and using \p Stride between
/// vectors.
MatrixTy loadMatrix(Type *Ty, Value *Ptr, MaybeAlign MAlign, Value *Stride,
@@ -1304,6 +1322,7 @@ public:
Type *VecTy = FixedVectorType::get(EltTy, Shape.getStride());
Value *EltPtr = Ptr;
MatrixTy Result;
+ Stride = castToIndexType(Ptr, Stride, Builder);
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
Value *GEP = computeVectorAddr(
EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
@@ -1325,14 +1344,14 @@ public:
ShapeInfo ResultShape, Type *EltTy,
IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
- Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
+ Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I);
Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, ResultShape.NumRows *
ResultShape.NumColumns);
return loadMatrix(TileTy, TileStart, Align,
- Builder.getInt64(MatrixShape.getStride()), IsVolatile,
+ getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile,
ResultShape, Builder);
}
@@ -1363,14 +1382,15 @@ public:
MaybeAlign MAlign, bool IsVolatile, ShapeInfo MatrixShape,
Value *I, Value *J, Type *EltTy, IRBuilder<> &Builder) {
Value *Offset = Builder.CreateAdd(
- Builder.CreateMul(J, Builder.getInt64(MatrixShape.getStride())), I);
+ Builder.CreateMul(J, getIndex(MatrixPtr, MatrixShape.getStride())), I);
Value *TileStart = Builder.CreateGEP(EltTy, MatrixPtr, Offset);
auto *TileTy = FixedVectorType::get(EltTy, StoreVal.getNumRows() *
StoreVal.getNumColumns());
storeMatrix(TileTy, StoreVal, TileStart, MAlign,
- Builder.getInt64(MatrixShape.getStride()), IsVolatile, Builder);
+ getIndex(MatrixPtr, MatrixShape.getStride()), IsVolatile,
+ Builder);
}
/// Store matrix \p StoreVal starting at \p Ptr and using \p Stride between
@@ -1380,6 +1400,7 @@ public:
IRBuilder<> &Builder) {
auto *VType = cast<FixedVectorType>(Ty);
Value *EltPtr = Ptr;
+ Stride = castToIndexType(Ptr, Stride, Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
Value *GEP = computeVectorAddr(
EltPtr,
@@ -2011,18 +2032,17 @@ public:
const unsigned TileM = std::min(M - K, unsigned(TileSize));
MatrixTy A =
loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
- LShape, Builder.getInt64(I), Builder.getInt64(K),
+ LShape, getIndex(APtr, I), getIndex(APtr, K),
{TileR, TileM}, EltType, Builder);
MatrixTy B =
loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
- RShape, Builder.getInt64(K), Builder.getInt64(J),
+ RShape, getIndex(BPtr, K), getIndex(BPtr, J),
{TileM, TileC}, EltType, Builder);
emitMatrixMultiply(Res, A, B, Builder, true, false,
getFastMathFlags(MatMul));
}
storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
- Builder.getInt64(I), Builder.getInt64(J), EltType,
- Builder);
+ getIndex(CPtr, I), getIndex(CPtr, J), EltType, Builder);
}
}
@@ -2254,15 +2274,14 @@ public:
/// Lower load instructions.
MatrixTy VisitLoad(LoadInst *Inst, const ShapeInfo &SI, Value *Ptr,
IRBuilder<> &Builder) {
- return LowerLoad(Inst, Ptr, Inst->getAlign(),
- Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI,
- Builder);
+ return LowerLoad(Inst, Ptr, Inst->getAlign(), getIndex(Ptr, SI.getStride()),
+ Inst->isVolatile(), SI, Builder);
}
MatrixTy VisitStore(StoreInst *Inst, const ShapeInfo &SI, Value *StoredVal,
Value *Ptr, IRBuilder<> &Builder) {
return LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(),
- Builder.getInt64(SI.getStride()), Inst->isVolatile(), SI,
+ getIndex(Ptr, SI.getStride()), Inst->isVolatile(), SI,
Builder);
}
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index b187208..32924e7 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -44,7 +44,7 @@ using namespace llvm;
STATISTIC(RemappedAtomMax, "Highest global NextAtomGroup (after mapping)");
void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) {
- auto CurGroup = DL->getAtomGroup();
+ uint64_t CurGroup = DL->getAtomGroup();
if (!CurGroup)
return;
@@ -62,21 +62,20 @@ void llvm::mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap) {
RemappedAtomMax = std::max<uint64_t>(NewGroup, RemappedAtomMax);
}
-namespace {
-void collectDebugInfoFromInstructions(const Function &F,
- DebugInfoFinder &DIFinder) {
+static void collectDebugInfoFromInstructions(const Function &F,
+ DebugInfoFinder &DIFinder) {
const Module *M = F.getParent();
- if (M) {
- // Inspect instructions to process e.g. DILexicalBlocks of inlined functions
- for (const auto &I : instructions(F))
- DIFinder.processInstruction(*M, I);
- }
+ if (!M)
+ return;
+ // Inspect instructions to process e.g. DILexicalBlocks of inlined functions
+ for (const Instruction &I : instructions(F))
+ DIFinder.processInstruction(*M, I);
}
// Create a predicate that matches the metadata that should be identity mapped
// during function cloning.
-MetadataPredicate createIdentityMDPredicate(const Function &F,
- CloneFunctionChangeType Changes) {
+static MetadataPredicate
+createIdentityMDPredicate(const Function &F, CloneFunctionChangeType Changes) {
if (Changes >= CloneFunctionChangeType::DifferentModule)
return [](const Metadata *MD) { return false; };
@@ -107,7 +106,6 @@ MetadataPredicate createIdentityMDPredicate(const Function &F,
return false;
};
}
-} // namespace
/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
@@ -213,10 +211,9 @@ void llvm::CloneFunctionMetadataInto(Function &NewFunc, const Function &OldFunc,
const MetadataPredicate *IdentityMD) {
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc.getAllMetadata(MDs);
- for (auto MD : MDs) {
- NewFunc.addMetadata(MD.first,
- *MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
- Materializer, IdentityMD));
+ for (const auto &[Kind, MD] : MDs) {
+ NewFunc.addMetadata(Kind, *MapMetadata(MD, VMap, RemapFlag, TypeMapper,
+ Materializer, IdentityMD));
}
}
@@ -235,7 +232,6 @@ void llvm::CloneFunctionBodyInto(Function &NewFunc, const Function &OldFunc,
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
for (const BasicBlock &BB : OldFunc) {
-
// Create a new basic block and copy instructions into it!
BasicBlock *CBB =
CloneBasicBlock(&BB, VMap, NameSuffix, &NewFunc, CodeInfo);
@@ -321,7 +317,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Cloning is always a Module level operation, since Metadata needs to be
// cloned.
- const auto RemapFlag = RF_None;
+ const RemapFlags RemapFlag = RF_None;
CloneFunctionMetadataInto(*NewFunc, *OldFunc, VMap, RemapFlag, TypeMapper,
Materializer, &IdentityMD);
@@ -346,8 +342,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// visiting the metadata attached to global values, which would allow this
// code to be deleted. Alternatively, perhaps give responsibility for this
// update to CloneFunctionInto's callers.
- auto *NewModule = NewFunc->getParent();
- auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
+ Module *NewModule = NewFunc->getParent();
+ NamedMDNode *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu");
// Avoid multiple insertions of the same DICompileUnit to NMD.
SmallPtrSet<const void *, 8> Visited(llvm::from_range, NMD->operands());
@@ -355,7 +351,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
// the function (e.g. as instructions' scope).
DebugInfoFinder DIFinder;
collectDebugInfoFromInstructions(*OldFunc, DIFinder);
- for (auto *Unit : DIFinder.compile_units()) {
+ for (DICompileUnit *Unit : DIFinder.compile_units()) {
MDNode *MappedUnit =
MapMetadata(Unit, VMap, RF_None, TypeMapper, Materializer);
if (Visited.insert(MappedUnit).second)
@@ -821,17 +817,16 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
--PredCount[Pred];
// Figure out how many entries to remove from each PHI.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- ++PredCount[PN->getIncomingBlock(i)];
+ for (BasicBlock *Pred : PN->blocks())
+ ++PredCount[Pred];
// At this point, the excess predecessor entries are positive in the
// map. Loop over all of the PHIs and remove excess predecessor
// entries.
BasicBlock::iterator I = NewBB->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I) {
- for (const auto &PCI : PredCount) {
- BasicBlock *Pred = PCI.first;
- for (unsigned NumToRemove = PCI.second; NumToRemove; --NumToRemove)
+ for (const auto &[Pred, Count] : PredCount) {
+ for ([[maybe_unused]] unsigned _ : llvm::seq<unsigned>(Count))
PN->removeIncomingValue(Pred, false);
}
}
@@ -866,8 +861,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// As phi-nodes have been now remapped, allow incremental simplification of
// newly-cloned instructions.
const DataLayout &DL = NewFunc->getDataLayout();
- for (const auto &BB : *OldFunc) {
- for (const auto &I : BB) {
+ for (const BasicBlock &BB : *OldFunc) {
+ for (const Instruction &I : BB) {
auto *NewI = dyn_cast_or_null<Instruction>(VMap.lookup(&I));
if (!NewI)
continue;
@@ -997,8 +992,8 @@ void llvm::CloneAndPruneFunctionInto(
void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
- for (auto *BB : Blocks) {
- for (auto &Inst : *BB) {
+ for (BasicBlock *BB : Blocks) {
+ for (Instruction &Inst : *BB) {
RemapDbgRecordRange(Inst.getModule(), Inst.getDbgRecordRange(), VMap,
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
RemapInstruction(&Inst, VMap,
@@ -1151,9 +1146,9 @@ void llvm::cloneNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes,
StringRef Ext, LLVMContext &Context) {
MDBuilder MDB(Context);
- for (auto *ScopeList : NoAliasDeclScopes) {
- for (const auto &MDOperand : ScopeList->operands()) {
- if (MDNode *MD = dyn_cast<MDNode>(MDOperand)) {
+ for (MDNode *ScopeList : NoAliasDeclScopes) {
+ for (const MDOperand &MDOp : ScopeList->operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
AliasScopeNode SNANode(MD);
std::string Name;
@@ -1177,7 +1172,7 @@ void llvm::adaptNoAliasScopes(Instruction *I,
auto CloneScopeList = [&](const MDNode *ScopeList) -> MDNode * {
bool NeedsReplacement = false;
SmallVector<Metadata *, 8> NewScopeList;
- for (const auto &MDOp : ScopeList->operands()) {
+ for (const MDOperand &MDOp : ScopeList->operands()) {
if (MDNode *MD = dyn_cast<MDNode>(MDOp)) {
if (auto *NewMD = ClonedScopes.lookup(MD)) {
NewScopeList.push_back(NewMD);
@@ -1193,12 +1188,12 @@ void llvm::adaptNoAliasScopes(Instruction *I,
};
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
- if (auto *NewScopeList = CloneScopeList(Decl->getScopeList()))
+ if (MDNode *NewScopeList = CloneScopeList(Decl->getScopeList()))
Decl->setScopeList(NewScopeList);
auto replaceWhenNeeded = [&](unsigned MD_ID) {
if (const MDNode *CSNoAlias = I->getMetadata(MD_ID))
- if (auto *NewScopeList = CloneScopeList(CSNoAlias))
+ if (MDNode *NewScopeList = CloneScopeList(CSNoAlias))
I->setMetadata(MD_ID, NewScopeList);
};
replaceWhenNeeded(LLVMContext::MD_noalias);
diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index d7bf791..fb39fdd 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
@@ -112,7 +112,7 @@ struct BBValueInfo {
void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
SmallVectorImpl<PHINode *> *InsertedPHIs) {
DenseMap<BasicBlock *, BBValueInfo> BBInfos;
- for (auto &R : Rewrites) {
+ for (RewriteInfo &R : Rewrites) {
BBInfos.clear();
// Compute locations for new phi-nodes.
@@ -145,7 +145,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
BBInfos[BB].LiveOutValue = V;
// We've computed IDF, now insert new phi-nodes there.
- for (auto *FrontierBB : IDFBlocks) {
+ for (BasicBlock *FrontierBB : IDFBlocks) {
IRBuilder<> B(FrontierBB, FrontierBB->begin());
PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name);
BBInfos[FrontierBB].LiveInValue = PN;
@@ -156,7 +156,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
// IsLiveOut indicates whether we are computing live-out values (true) or
// live-in values (false).
auto ComputeValue = [&](BasicBlock *BB, bool IsLiveOut) -> Value * {
- auto *BBInfo = &BBInfos[BB];
+ BBValueInfo *BBInfo = &BBInfos[BB];
if (IsLiveOut && BBInfo->LiveOutValue)
return BBInfo->LiveOutValue;
@@ -187,7 +187,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
if (!V)
V = UndefValue::get(R.Ty);
- for (auto *BBInfo : Stack)
+ for (BBValueInfo *BBInfo : Stack)
// Loop above can insert new entries into the BBInfos map: assume the
// map shouldn't grow due to [1] and BBInfo references are valid.
BBInfo->LiveInValue = V;
@@ -196,7 +196,7 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
};
// Fill in arguments of the inserted PHIs.
- for (auto *BB : IDFBlocks) {
+ for (BasicBlock *BB : IDFBlocks) {
auto *PHI = cast<PHINode>(&BB->front());
for (BasicBlock *Pred : PredCache.get(BB))
PHI->addIncoming(ComputeValue(Pred, /*IsLiveOut=*/true), Pred);
@@ -222,3 +222,96 @@ void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT,
}
}
}
+
+// Perform a single pass of simplification over the worklist of PHIs.
+// This should be called after RewriteAllUses() because simplifying PHIs
+// immediately after creation would require updating all references to those
+// PHIs in the BBValueInfo structures, which would necessitate additional
+// reference tracking overhead.
+static void simplifyPass(MutableArrayRef<PHINode *> Worklist,
+ const DataLayout &DL) {
+ for (PHINode *&PHI : Worklist) {
+ if (Value *Simplified = simplifyInstruction(PHI, DL)) {
+ PHI->replaceAllUsesWith(Simplified);
+ PHI->eraseFromParent();
+ PHI = nullptr; // Mark as removed.
+ }
+ }
+}
+
+#ifndef NDEBUG // Should this be under EXPENSIVE_CHECKS?
+// New PHI nodes should not reference one another but they may reference
+// themselves or existing PHI nodes, and existing PHI nodes may reference new
+// PHI nodes.
+static bool
+PHIAreRefEachOther(const iterator_range<BasicBlock::phi_iterator> NewPHIs) {
+ SmallPtrSet<PHINode *, 8> NewPHISet;
+ for (PHINode &PN : NewPHIs)
+ NewPHISet.insert(&PN);
+ for (PHINode &PHI : NewPHIs) {
+ for (Value *V : PHI.incoming_values()) {
+ PHINode *IncPHI = dyn_cast<PHINode>(V);
+ if (IncPHI && IncPHI != &PHI && NewPHISet.contains(IncPHI))
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
+static bool replaceIfIdentical(PHINode &PHI, PHINode &ReplPHI) {
+ if (!PHI.isIdenticalToWhenDefined(&ReplPHI))
+ return false;
+ PHI.replaceAllUsesWith(&ReplPHI);
+ PHI.eraseFromParent();
+ return true;
+}
+
+bool EliminateNewDuplicatePHINodes(BasicBlock *BB,
+ BasicBlock::phi_iterator FirstExistingPN) {
+ assert(!PHIAreRefEachOther(make_range(BB->phis().begin(), FirstExistingPN)));
+
+ // Deduplicate new PHIs first to reduce the number of comparisons on the
+ // following new -> existing pass.
+ bool Changed = false;
+ for (auto I = BB->phis().begin(); I != FirstExistingPN; ++I) {
+ for (auto J = std::next(I); J != FirstExistingPN;) {
+ Changed |= replaceIfIdentical(*J++, *I);
+ }
+ }
+
+ // Iterate over existing PHIs and replace identical new PHIs.
+ for (PHINode &ExistingPHI : make_range(FirstExistingPN, BB->phis().end())) {
+ auto I = BB->phis().begin();
+ assert(I != FirstExistingPN); // Should be at least one new PHI.
+ do {
+ Changed |= replaceIfIdentical(*I++, ExistingPHI);
+ } while (I != FirstExistingPN);
+ if (BB->phis().begin() == FirstExistingPN)
+ return Changed;
+ }
+ return Changed;
+}
+
+static void deduplicatePass(ArrayRef<PHINode *> Worklist) {
+ SmallDenseMap<BasicBlock *, unsigned> BBs;
+ for (PHINode *PHI : Worklist) {
+ if (PHI)
+ ++BBs[PHI->getParent()];
+ }
+
+ for (auto [BB, NumNewPHIs] : BBs) {
+ auto FirstExistingPN = std::next(BB->phis().begin(), NumNewPHIs);
+ EliminateNewDuplicatePHINodes(BB, FirstExistingPN);
+ }
+}
+
+void SSAUpdaterBulk::RewriteAndOptimizeAllUses(DominatorTree &DT) {
+ SmallVector<PHINode *, 4> PHIs;
+ RewriteAllUses(&DT, &PHIs);
+ if (PHIs.empty())
+ return;
+
+ simplifyPass(PHIs, PHIs.front()->getParent()->getDataLayout());
+ deduplicatePass(PHIs);
+}
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 45cee1e..9035e58 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -526,7 +526,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Recognize the canonical representation of an unsimplifed urem.
const SCEV *URemLHS = nullptr;
const SCEV *URemRHS = nullptr;
- if (SE.matchURem(S, URemLHS, URemRHS)) {
+ if (match(S, m_scev_URem(m_SCEV(URemLHS), m_SCEV(URemRHS), SE))) {
Value *LHS = expand(URemLHS);
Value *RHS = expand(URemRHS);
return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a6f4bec..88af2cf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10659,7 +10659,8 @@ class InstructionsCompatibilityAnalysis {
static bool isSupportedOpcode(const unsigned Opcode) {
return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
Opcode == Instruction::Shl || Opcode == Instruction::SDiv ||
- Opcode == Instruction::UDiv;
+ Opcode == Instruction::UDiv || Opcode == Instruction::And ||
+ Opcode == Instruction::Or || Opcode == Instruction::Xor;
}
/// Identifies the best candidate value, which represents main opcode
@@ -10984,6 +10985,9 @@ public:
case Instruction::Shl:
case Instruction::SDiv:
case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
@@ -19456,7 +19460,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
assert(getNumElements(Cond->getType()) == TrueNumElements &&
"Cannot vectorize Instruction::Select");
- Value *V = Builder.CreateSelect(Cond, True, False);
+ Value *V =
+ Builder.CreateSelectWithUnknownProfile(Cond, True, False, DEBUG_TYPE);
V = FinalShuffle(V, E);
E->VectorizedValue = V;
@@ -23576,18 +23581,19 @@ class HorizontalReduction {
switch (Kind) {
case RecurKind::Or: {
if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy))
- return Builder.CreateSelect(
+ return Builder.CreateSelectWithUnknownProfile(
LHS, ConstantInt::getAllOnesValue(CmpInst::makeCmpResultType(OpTy)),
- RHS, Name);
+ RHS, DEBUG_TYPE, Name);
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
}
case RecurKind::And: {
if (UseSelect && OpTy == CmpInst::makeCmpResultType(OpTy))
- return Builder.CreateSelect(
+ return Builder.CreateSelectWithUnknownProfile(
LHS, RHS,
- ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)), Name);
+ ConstantInt::getNullValue(CmpInst::makeCmpResultType(OpTy)),
+ DEBUG_TYPE, Name);
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
@@ -23608,7 +23614,8 @@ class HorizontalReduction {
if (UseSelect) {
CmpInst::Predicate Pred = llvm::getMinMaxReductionPredicate(Kind);
Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS, Name);
- return Builder.CreateSelect(Cmp, LHS, RHS, Name);
+ return Builder.CreateSelectWithUnknownProfile(Cmp, LHS, RHS, DEBUG_TYPE,
+ Name);
}
[[fallthrough]];
case RecurKind::FMax:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fb696be..8ca3bed 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1064,6 +1064,7 @@ public:
ResumeForEpilogue,
/// Returns the value for vscale.
VScale,
+ OpsEnd = VScale,
};
/// Returns true if this VPInstruction generates scalar values for all lanes.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 3e65d42..c0147ce 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -479,8 +479,7 @@ static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
PredicatedScalarEvolution &PSE, Loop *TheLoop) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
canonicalHeaderAndLatch(HeaderVPBB, VPDT);
@@ -622,8 +621,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan,
}
void VPlanTransforms::createLoopRegions(VPlan &Plan) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
createLoopRegion(Plan, HeaderVPB);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 577432f..44506f5a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -39,7 +39,6 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
using Base = DominatorTreeBase<VPBlockBase, false>;
public:
- VPDominatorTree() = default;
explicit VPDominatorTree(VPlan &Plan) { recalculate(Plan); }
/// Returns true if \p A properly dominates \p B.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index af755ca..40b7e8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1909,8 +1909,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &LoopBuilder) {
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
for (VPRecipeBase &R :
@@ -1992,6 +1991,13 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
return std::make_pair(true, I->getVectorIntrinsicID());
})
+ .Case<VPVectorPointerRecipe>([](auto *I) {
+ // For recipes that do not directly map to LLVM IR instructions,
+ // assign opcodes after the last VPInstruction opcode (which is also
+ // after the last IR Instruction opcode), based on the VPDefID.
+ return std::make_pair(false,
+ VPInstruction::OpsEnd + 1 + I->getVPDefID());
+ })
.Default([](auto *) { return std::nullopt; });
}
@@ -2015,11 +2021,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
static bool canHandle(const VPSingleDefRecipe *Def) {
// We can extend the list of handled recipes in the future,
// provided we account for the data embedded in them while checking for
- // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode,
- // as it is essentially a GEP with different semantics.
- auto C = isa<VPVectorPointerRecipe>(Def)
- ? std::make_pair(false, Instruction::GetElementPtr)
- : getOpcodeOrIntrinsicID(Def);
+ // equality or hashing.
+ auto C = getOpcodeOrIntrinsicID(Def);
// The issue with (Insert|Extract)Value is that the index of the
// insert/extract is not a proper operand in LLVM IR, and hence also not in
@@ -2058,6 +2061,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
vputils::isSingleScalar(L) != vputils::isSingleScalar(R) ||
!equal(L->operands(), R->operands()))
return false;
+ assert(getOpcodeOrIntrinsicID(L) && getOpcodeOrIntrinsicID(R) &&
+ "must have valid opcode info for both recipes");
if (auto *LFlags = dyn_cast<VPRecipeWithIRFlags>(L))
if (LFlags->hasPredicate() &&
LFlags->getPredicate() !=
@@ -3021,8 +3026,7 @@ void VPlanTransforms::createInterleaveGroups(
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
for (const auto *IG : InterleaveGroups) {
auto *Start =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getMember(0)));
@@ -3661,8 +3665,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
return;
#ifndef NDEBUG
- VPDominatorTree VPDT;
- VPDT.recalculate(Plan);
+ VPDominatorTree VPDT(Plan);
#endif
SmallVector<VPValue *> VPValues;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 752e03d..5262af6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -489,8 +489,7 @@ bool VPlanVerifier::verify(const VPlan &Plan) {
}
bool llvm::verifyVPlanIsValid(const VPlan &Plan, bool VerifyLate) {
- VPDominatorTree VPDT;
- VPDT.recalculate(const_cast<VPlan &>(Plan));
+ VPDominatorTree VPDT(const_cast<VPlan &>(Plan));
VPTypeAnalysis TypeInfo(Plan);
VPlanVerifier Verifier(VPDT, TypeInfo, VerifyLate);
return Verifier.verify(Plan);