aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp22
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp24
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp3
-rw-r--r--llvm/lib/Transforms/ObjCARC/PtrState.h3
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp54
-rw-r--r--llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h27
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h36
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp112
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp15
37 files changed, 362 insertions, 266 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index f166fef..cf7e450 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
- BranchProbability MinBranchProbability(
- static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
- MinBlockCounterExecution);
-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
- auto Prob = BranchProbability::getBranchProbability(
- BFI.getBlockFreq(CB->getParent()).getFrequency(),
- BFI.getEntryFreq().getFrequency());
+ auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency();
+ auto EntryFreq = BFI.getEntryFreq().getFrequency();
+ uint64_t MinFreq =
+ static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio);
- if (Prob < MinBranchProbability) {
+ if (BlockFreq < MinFreq) {
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '"
<< ore::NV("caller", Caller->getName())
- << "' because of low probability: "
- << ore::NV("probability", Prob) << " (threshold: "
- << ore::NV("threshold", MinBranchProbability) << ")";
+ << "' because of low frequency: "
+ << ore::NV("block_freq", BlockFreq)
+ << " (threshold: " << ore::NV("min_freq", MinFreq) << ")";
});
continue;
}
@@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
- << "' (probability: " << ore::NV("probability", Prob) << ")";
+ << "' (block_freq: " << ore::NV("block_freq", BlockFreq)
+ << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4c9b10a..cdc559b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -156,9 +156,9 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
- if (auto *SrcC = dyn_cast<Constant>(Src))
- if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
- return replaceInstUsesWith(CI, Res);
+ if (Value *Res =
+ simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
+ return replaceInstUsesWith(CI, Res);
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 511bca4..6e17801 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
return Mapping;
}
-namespace llvm {
-void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
- bool IsKasan, uint64_t *ShadowBase,
- int *MappingScale, bool *OrShadowOffset) {
+void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
*ShadowBase = Mapping.Offset;
*MappingScale = Mapping.Scale;
*OrShadowOffset = Mapping.OrShadowOffset;
}
-void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
+void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
// Sanitizer checks read from shadow, which invalidates memory(argmem: *).
//
// This is not only true for sanitized functions, because AttrInfer can
@@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
CompileKernel(CompileKernel) {}
-} // namespace llvm
-
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) {
}
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
- if (TargetTriple.isOSEmscripten()) {
+ if (TargetTriple.isOSEmscripten())
return kAsanEmscriptenCtorAndDtorPriority;
- } else {
+ else
return kAsanCtorAndDtorPriority;
- }
}
static Twine genName(StringRef suffix) {
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 7c78eb3..72e8e50 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -396,9 +396,8 @@ class CHR {
} // end anonymous namespace
-static inline
-raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
- const CHRStats &Stats) {
+[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
+ const CHRStats &Stats) {
Stats.print(OS);
return OS;
}
@@ -425,8 +424,8 @@ static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) {
return PSI.isFunctionEntryHot(&F);
}
-static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
- CHRStats *Stats) {
+[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
+ CHRStats *Stats) {
StringRef FuncName = F.getName();
StringRef ModuleName = F.getParent()->getName();
(void)(FuncName); // Unused in release build.
@@ -1622,7 +1621,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
}
// Assert that all the CHR regions of the scope have a biased branch or select.
-static void LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]] static void
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
#ifndef NDEBUG
auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
@@ -1644,8 +1643,9 @@ assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
// Assert that all the condition values of the biased branches and selects have
// been hoisted to the pre-entry block or outside of the scope.
-static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
- CHRScope *Scope, BasicBlock *PreEntryBlock) {
+[[maybe_unused]] static void
+assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+ BasicBlock *PreEntryBlock) {
CHR_DEBUG(dbgs() << "Biased regions condition values \n");
for (RegInfo &RI : Scope->CHRRegions) {
Region *R = RI.R;
@@ -2007,8 +2007,8 @@ void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
}
}
-static void LLVM_ATTRIBUTE_UNUSED
-dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
+[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
+ const char *Label) {
dbgs() << Label << " " << Scopes.size() << "\n";
for (CHRScope *Scope : Scopes) {
dbgs() << *Scope << "\n";
@@ -2092,8 +2092,6 @@ bool CHR::run() {
return Changed;
}
-namespace llvm {
-
ControlHeightReductionPass::ControlHeightReductionPass() {
parseCHRFilterFiles();
}
@@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run(
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 09db464..386e48f 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -326,8 +326,7 @@ const unsigned BBState::OverflowOccurredValue = 0xffffffff;
namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS,
- BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, BBState &BBState);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/ObjCARC/PtrState.h b/llvm/lib/Transforms/ObjCARC/PtrState.h
index 232db2b..5cc4212 100644
--- a/llvm/lib/Transforms/ObjCARC/PtrState.h
+++ b/llvm/lib/Transforms/ObjCARC/PtrState.h
@@ -47,8 +47,7 @@ enum Sequence {
S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
};
-raw_ostream &operator<<(raw_ostream &OS,
- const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const Sequence S);
/// Unidirectional information about either a
/// retain-decrement-use-release sequence or release-use-decrement-retain
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 7ad710d..6141b6d 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -77,6 +77,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -805,9 +806,8 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
return nullptr;
}
-namespace {
// Returns true if \p I is an intrinsic that does not read or write memory.
-bool isNoopIntrinsic(Instruction *I) {
+static bool isNoopIntrinsic(Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
@@ -828,7 +828,7 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+static bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
@@ -856,6 +856,8 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return false;
}
+namespace {
+
// A memory location wrapper that represents a MemoryLocation, `MemLoc`,
// defined by `MemDef`.
struct MemoryLocationWrapper {
@@ -889,23 +891,25 @@ struct MemoryDefWrapper {
SmallVector<MemoryLocationWrapper, 1> DefinedLocations;
};
-bool hasInitializesAttr(Instruction *I) {
- CallBase *CB = dyn_cast<CallBase>(I);
- return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
-}
-
struct ArgumentInitInfo {
unsigned Idx;
bool IsDeadOrInvisibleOnUnwind;
ConstantRangeList Inits;
};
+} // namespace
+
+static bool hasInitializesAttr(Instruction *I) {
+ CallBase *CB = dyn_cast<CallBase>(I);
+ return CB && CB->getArgOperandWithAttribute(Attribute::Initializes);
+}
// Return the intersected range list of the initializes attributes of "Args".
// "Args" are call arguments that alias to each other.
// If any argument in "Args" doesn't have dead_on_unwind attr and
// "CallHasNoUnwindAttr" is false, return empty.
-ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
- bool CallHasNoUnwindAttr) {
+static ConstantRangeList
+getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
+ bool CallHasNoUnwindAttr) {
if (Args.empty())
return {};
@@ -925,6 +929,8 @@ ConstantRangeList getIntersectedInitRangeList(ArrayRef<ArgumentInitInfo> Args,
return IntersectedIntervals;
}
+namespace {
+
struct DSEState {
Function &F;
AliasAnalysis &AA;
@@ -2328,10 +2334,11 @@ struct DSEState {
// change state: whether make any change.
bool eliminateDeadDefs(const MemoryDefWrapper &KillingDefWrapper);
};
+} // namespace
// Return true if "Arg" is function local and isn't captured before "CB".
-bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
- EarliestEscapeAnalysis &EA) {
+static bool isFuncLocalAndNotCaptured(Value *Arg, const CallBase *CB,
+ EarliestEscapeAnalysis &EA) {
const Value *UnderlyingObj = getUnderlyingObject(Arg);
return isIdentifiedFunctionLocal(UnderlyingObj) &&
capturesNothing(
@@ -2627,7 +2634,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
return MadeChange;
}
-} // end anonymous namespace
//===----------------------------------------------------------------------===//
// DSE Pass
@@ -2728,8 +2734,6 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
-namespace llvm {
-LLVM_ABI FunctionPass *createDeadStoreEliminationPass() {
+LLVM_ABI FunctionPass *llvm::createDeadStoreEliminationPass() {
return new DSELegacyPass();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 1c88532..b9534def 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -73,24 +73,17 @@
#include <utility>
using namespace llvm;
+using namespace llvm::GVNExpression;
#define DEBUG_TYPE "gvn-sink"
STATISTIC(NumRemoved, "Number of instructions removed");
-namespace llvm {
-namespace GVNExpression {
-
LLVM_DUMP_METHOD void Expression::dump() const {
print(dbgs());
dbgs() << "\n";
}
-} // end namespace GVNExpression
-} // end namespace llvm
-
-namespace {
-
static bool isMemoryInst(const Instruction *I) {
return isa<LoadInst>(I) || isa<StoreInst>(I) ||
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
@@ -99,6 +92,8 @@ static bool isMemoryInst(const Instruction *I) {
//===----------------------------------------------------------------------===//
+namespace {
+
/// Candidate solution for sinking. There may be different ways to
/// sink instructions, differing in the number of instructions sunk,
/// the number of predecessors sunk from and the number of PHIs
@@ -125,14 +120,6 @@ struct SinkingInstructionCandidate {
}
};
-#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
- OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
- << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
- return OS;
-}
-#endif
-
//===----------------------------------------------------------------------===//
/// Describes a PHI node that may or may not exist. These track the PHIs
@@ -256,8 +243,18 @@ public:
return Values == Other.Values && Blocks == Other.Blocks;
}
};
+} // namespace
-template <typename ModelledPHI> struct DenseMapInfo {
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS,
+ const SinkingInstructionCandidate &C) {
+ OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
+ << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
+ return OS;
+}
+#endif
+
+template <> struct llvm::DenseMapInfo<ModelledPHI> {
static inline ModelledPHI &getEmptyKey() {
static ModelledPHI Dummy = ModelledPHI::createDummy(0);
return Dummy;
@@ -275,7 +272,9 @@ template <typename ModelledPHI> struct DenseMapInfo {
}
};
-using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
+using ModelledPHISet = DenseSet<ModelledPHI>;
+
+namespace {
//===----------------------------------------------------------------------===//
// ValueTable
@@ -290,7 +289,7 @@ using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
///
/// This class also contains fields for discriminators used when determining
/// equivalence of instructions with sideeffects.
-class InstructionUseExpr : public GVNExpression::BasicExpression {
+class InstructionUseExpr : public BasicExpression {
unsigned MemoryUseOrder = -1;
bool Volatile = false;
ArrayRef<int> ShuffleMask;
@@ -298,7 +297,7 @@ class InstructionUseExpr : public GVNExpression::BasicExpression {
public:
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
BumpPtrAllocator &A)
- : GVNExpression::BasicExpression(I->getNumUses()) {
+ : BasicExpression(I->getNumUses()) {
allocateOperands(R, A);
setOpcode(I->getOpcode());
setType(I->getType());
@@ -315,8 +314,8 @@ public:
void setVolatile(bool V) { Volatile = V; }
hash_code getHashValue() const override {
- return hash_combine(GVNExpression::BasicExpression::getHashValue(),
- MemoryUseOrder, Volatile, ShuffleMask);
+ return hash_combine(BasicExpression::getHashValue(), MemoryUseOrder,
+ Volatile, ShuffleMask);
}
template <typename Function> hash_code getHashValue(Function MapFn) {
@@ -332,7 +331,7 @@ using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
class ValueTable {
DenseMap<Value *, uint32_t> ValueNumbering;
- DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
+ DenseMap<Expression *, uint32_t> ExpressionNumbering;
DenseMap<size_t, uint32_t> HashNumbering;
BumpPtrAllocator Allocator;
ArrayRecycler<Value *> Recycler;
@@ -594,6 +593,7 @@ private:
}
}
};
+} // namespace
std::optional<SinkingInstructionCandidate>
GVNSink::analyzeInstructionForSinking(LockstepReverseIterator<false> &LRI,
@@ -851,8 +851,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
NumRemoved += Insts.size() - 1;
}
-} // end anonymous namespace
-
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
GVNSink G;
if (!G.run(F))
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index d99f1eb..ddb99a5 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -75,8 +75,6 @@ static cl::opt<bool>
"expressed as branches by widenable conditions"),
cl::init(true));
-namespace {
-
// Get the condition of \p I. It can either be a guard or a conditional branch.
static Value *getCondition(Instruction *I) {
if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) {
@@ -130,6 +128,8 @@ findInsertionPointForWideCondition(Instruction *WCOrGuard) {
return std::nullopt;
}
+namespace {
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
@@ -328,7 +328,7 @@ public:
/// The entry point for this pass.
bool run();
};
-}
+} // namespace
static bool isSupportedGuardInstruction(const Instruction *Insn) {
if (isGuard(Insn))
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c327311..7ebcc21 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 3c14036e..6fb8197 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,8 +26,6 @@
using namespace llvm;
-namespace llvm {
-
static cl::opt<unsigned>
JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
cl::desc("Only split jump tables with size less or "
@@ -43,8 +41,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
"or equal than this threshold."),
cl::init(50));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
#define DEBUG_TYPE "jump-table-to-switch"
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 9655173..b2c526b 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,8 +116,6 @@ STATISTIC(NumIntAssociationsHoisted,
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
"reassociated and hoisted out of the loop");
-namespace llvm {
-
/// Memory promotion is enabled by default.
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -156,7 +154,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
// which may not be precise, since optimizeUses is capped. The result is
// correct, but we may not get as "far up" as possible to get which access is
// clobbering the one queried.
-cl::opt<unsigned> SetLicmMssaOptCap(
+cl::opt<unsigned> llvm::SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
@@ -164,15 +162,15 @@ cl::opt<unsigned> SetLicmMssaOptCap(
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
-cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
"number of accesses allowed to be present in a loop in order to "
"enable memory promotion."));
+namespace llvm {
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
-
} // end namespace llvm
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
@@ -1120,11 +1118,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
}
-namespace {
/// Return true if-and-only-if we know how to (mechanically) both hoist and
/// sink a given instruction out of a loop. Does not address legality
/// concerns such as aliasing or speculation safety.
-bool isHoistableAndSinkableInst(Instruction &I) {
+static bool isHoistableAndSinkableInst(Instruction &I) {
// Only these instructions are hoistable/sinkable.
return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
@@ -1136,8 +1133,8 @@ bool isHoistableAndSinkableInst(Instruction &I) {
}
/// Return true if I is the only Instruction with a MemoryAccess in L.
-bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
- const MemorySSAUpdater &MSSAU) {
+static bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
+ const MemorySSAUpdater &MSSAU) {
for (auto *BB : L->getBlocks())
if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
int NotAPhi = 0;
@@ -1151,7 +1148,6 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
return true;
}
-}
static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
BatchAAResults &BAA,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 73f1942..7706de8 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -21,8 +21,7 @@
#define DEBUG_TYPE "loop-bound-split"
-namespace llvm {
-
+using namespace llvm;
using namespace PatternMatch;
namespace {
@@ -358,8 +357,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
IRBuilder<> Builder(&PostLoopPreHeader->front());
// Update phi nodes in header of post-loop.
- bool isExitingLatch =
- (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ bool isExitingLatch = L.getExitingBlock() == L.getLoopLatch();
Value *ExitingCondLCSSAPhi = nullptr;
for (PHINode &PN : L.getHeader()->phis()) {
// Create LCSSA phi node in preheader of post-loop.
@@ -472,8 +470,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function &F = *L.getHeader()->getParent();
- (void)F;
+ [[maybe_unused]] Function &F = *L.getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Spliting bound of loop in " << F.getName() << ": " << L
<< "\n");
@@ -486,5 +483,3 @@ PreservedAnalyses LoopBoundSplitPass::run(Loop &L, LoopAnalysisManager &AM,
return getLoopPassPreservedAnalyses();
}
-
-} // end namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 32078b1..7da8586 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -16,8 +16,6 @@
using namespace llvm;
-namespace llvm {
-
/// Explicitly specialize the pass manager's run method to handle loop nest
/// structure updates.
PreservedAnalyses
@@ -185,7 +183,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
}
return PA;
}
-} // namespace llvm
void FunctionToLoopPassAdaptor::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
@@ -193,6 +190,7 @@ void FunctionToLoopPassAdaptor::printPipeline(
Pass->printPipeline(OS, MapClassName2PassName);
OS << ')';
}
+
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 448dc2b..f3e6cbf 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) {
return Changed;
}
-namespace llvm {
-
PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &LAR,
LPMUpdater &U) {
@@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 80aa98d..5a8f18a 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
//===----------------------------------------------------------------------===//
// Anchor methods.
-namespace llvm {
-namespace GVNExpression {
-
Expression::~Expression() = default;
BasicExpression::~BasicExpression() = default;
CallExpression::~CallExpression() = default;
@@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default;
AggregateValueExpression::~AggregateValueExpression() = default;
PHIExpression::~PHIExpression() = default;
-} // end namespace GVNExpression
-} // end namespace llvm
-
namespace {
// Tarjan's SCC finding algorithm with Nuutila's improvements
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ba58b8e..6d7ce36 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
namespace {
- class ReassociateLegacyPass : public FunctionPass {
- ReassociatePass Impl;
+class ReassociateLegacyPass : public FunctionPass {
+ ReassociatePass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- ReassociateLegacyPass() : FunctionPass(ID) {
- initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ ReassociateLegacyPass() : FunctionPass(ID) {
+ initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
- FunctionAnalysisManager DummyFAM;
- auto PA = Impl.run(F, DummyFAM);
- return !PA.areAllPreserved();
- }
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 578fec7..a692009 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -551,12 +551,10 @@ public:
}
/// Support comparison with a single offset to allow binary searches.
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
- uint64_t RHSOffset) {
+ [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const Slice &RHS) {
+ [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
return LHSOffset < RHS.beginOffset();
}
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index aae5d60..25a531c 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -50,9 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-namespace {
-
-BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
BasicBlock *BB = Itr->getParent();
if (isa<PHINode>(Itr))
Itr = BB->getFirstInsertionPt();
@@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+namespace {
+
struct VectorSplit {
// The type of the vector.
FixedVectorType *VecTy = nullptr;
@@ -196,6 +196,7 @@ struct VectorLayout {
// The size of each (non-remainder) fragment in bytes.
uint64_t SplitSize = 0;
};
+} // namespace
static bool isStructOfMatchingFixedVectors(Type *Ty) {
if (!isa<StructType>(Ty))
@@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
return Res;
}
+namespace {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ebcbd2b..fa66a03 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
return Impl.runImpl(F, TTI);
}
-namespace llvm {
-
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
@@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
return true;
}
-FunctionPass *createSpeculativeExecutionPass() {
+FunctionPass *llvm::createSpeculativeExecutionPass() {
return new SpeculativeExecutionLegacyPass();
}
-FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() {
return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
}
@@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline(
OS << "only-if-divergent-target";
OS << '>';
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 7d01709..e94ad19 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
return Ret;
}
-namespace llvm {
-
PreservedAnalyses
StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout *DL = &F.getDataLayout();
@@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<TargetIRAnalysis>();
return PA;
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 1d83ddc..89d41f3e 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker {
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
-}
+} // namespace
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
@@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass {
/*BFI=*/nullptr);
}
};
-}
+} // namespace
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index b18acea..4fe736a 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -1106,7 +1106,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
Phi.replaceAllUsesWith(RdxResult);
- continue;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b96d29e..280eb20 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7957,9 +7957,9 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
- auto *CI = dyn_cast<ConstantInt>(OpI);
- if (I > 0 && CI &&
- canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ const APInt *C;
+ if (I > 0 && match(OpI, m_APInt(C)) &&
+ canConstantBeExtended(C, ExtOpTypes[0], ExtKinds[0])) {
ExtOpTypes[I] = ExtOpTypes[0];
ExtKinds[I] = ExtKinds[0];
continue;
@@ -8240,14 +8240,14 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// the vector loop or when not folding the tail. In the later case, we know
// that the canonical induction increment will not overflow as the vector trip
// count is >= increment and a multiple of the increment.
+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
if (!HasNUW) {
- auto *IVInc = Plan->getVectorLoopRegion()
- ->getExitingBasicBlock()
- ->getTerminator()
- ->getOperand(0);
- assert(match(IVInc, m_VPInstruction<Instruction::Add>(
- m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
+ auto *IVInc =
+ LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
+ assert(match(IVInc,
+ m_VPInstruction<Instruction::Add>(
+ m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
}
@@ -8293,7 +8293,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
- VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
@@ -8377,8 +8376,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
for (VPValue *Old : Old2New.keys())
Old->getDefiningRecipe()->eraseFromParent();
- assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
- !Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
+ assert(isa<VPRegionBlock>(LoopRegion) &&
+ !LoopRegion->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
@@ -9326,8 +9325,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
ResumePhi = ScalarPHBuilder.createScalarPhi(
- {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
- "vec.epilog.resume.val");
+ {VectorTC,
+ MainPlan.getVectorLoopRegion()->getCanonicalIV()->getStartValue()},
+ {}, "vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
if (MainScalarPH->begin() == MainScalarPH->end())
@@ -9354,7 +9354,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ VPCanonicalIVPHIRecipe *IV = VectorLoop->getCanonicalIV();
// When vectorizing the epilogue loop, the canonical induction needs to be
// adjusted by the value after the main vector loop. Find the resume value
// created during execution of the main VPlan. It must be the first phi in the
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0101942..d167009 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1753,14 +1753,14 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind) {
- APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
- unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = C->getBitWidth();
APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
? TruncatedVal.sext(WideSize)
: TruncatedVal.zext(WideSize);
- return ExtendedVal == CI->getValue();
+ return ExtendedVal == *C;
}
TargetTransformInfo::OperandValueInfo
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 23f5623..0e0b042 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1012,6 +1012,8 @@ public:
// part if scalar. In the latter case, the recipe will be removed during
// unrolling.
ExtractLastElement,
+ // Extracts the last lane for each part from its operand.
+ ExtractLastLanePerPart,
// Extracts the second-to-last lane from its operand or the second-to-last
// part if it is scalar. In the latter case, the recipe will be removed
// during unrolling.
@@ -4058,6 +4060,19 @@ public:
/// Remove the current region from its VPlan, connecting its predecessor to
/// its entry, and its exiting block to its successor.
void dissolveToCFGLoop();
+
+ /// Returns the canonical induction recipe of the region.
+ VPCanonicalIVPHIRecipe *getCanonicalIV() {
+ VPBasicBlock *EntryVPBB = getEntryBasicBlock();
+ if (EntryVPBB->empty()) {
+ // VPlan native path. TODO: Unify both code paths.
+ EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
+ }
+ return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
+ }
+ const VPCanonicalIVPHIRecipe *getCanonicalIV() const {
+ return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
+ }
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4252,12 +4267,14 @@ public:
BackedgeTakenCount = new VPValue();
return BackedgeTakenCount;
}
+ VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
/// The vector trip count.
VPValue &getVectorTripCount() { return VectorTripCount; }
/// Returns the VF of the vector loop region.
VPValue &getVF() { return VF; };
+ const VPValue &getVF() const { return VF; };
/// Returns VF * UF of the vector loop region.
VPValue &getVFxUF() { return VFxUF; }
@@ -4369,16 +4386,6 @@ public:
LLVM_DUMP_METHOD void dump() const;
#endif
- /// Returns the canonical induction recipe of the vector loop.
- VPCanonicalIVPHIRecipe *getCanonicalIV() {
- VPBasicBlock *EntryVPBB = getVectorLoopRegion()->getEntryBasicBlock();
- if (EntryVPBB->empty()) {
- // VPlan native path.
- EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
- }
- return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
- }
-
VPValue *getSCEVExpansion(const SCEV *S) const {
return SCEVToExpansion.lookup(S);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 07bfe7a..f413c63 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -116,6 +116,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index c0147ce..332791a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -658,9 +658,11 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
}
VPIRMetadata VPBranchWeights;
- auto *Term = VPBuilder(CheckBlockVPBB)
- .createNaryOp(VPInstruction::BranchOnCond, {CondVPV},
- Plan.getCanonicalIV()->getDebugLoc());
+ auto *Term =
+ VPBuilder(CheckBlockVPBB)
+ .createNaryOp(
+ VPInstruction::BranchOnCond, {CondVPV},
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getDebugLoc());
if (AddBranchWeights) {
MDBuilder MDB(Plan.getContext());
MDNode *BranchWeights =
@@ -921,8 +923,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
- &Plan.getVectorTripCount());
+ auto *NewSel = Builder.createSelect(
+ AnyNaN, LoopRegion->getCanonicalIV(), &Plan.getVectorTripCount());
DerivedIV->moveAfter(&*Builder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
@@ -935,7 +937,8 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
"FMaxNum/FMinNum reduction.\n");
return false;
}
- auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
+ auto *NewSel =
+ Builder.createSelect(AnyNaN, LoopRegion->getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 1580a3b..2aaabd9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -474,7 +474,7 @@ public:
/// Check if a constant \p CI can be safely treated as having been extended
/// from a narrower type with the given extension kind.
-bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+bool canConstantBeExtended(const APInt *C, Type *NarrowType,
TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b42b049..d8203e2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -173,10 +173,10 @@ inline int_pred_ty<is_zero_int> m_ZeroInt() {
/// For vectors, this includes constants with undefined elements.
inline int_pred_ty<is_one> m_One() { return int_pred_ty<is_one>(); }
-struct bind_const_int {
- uint64_t &Res;
+struct bind_apint {
+ const APInt *&Res;
- bind_const_int(uint64_t &Res) : Res(Res) {}
+ bind_apint(const APInt *&Res) : Res(Res) {}
bool match(VPValue *VPV) const {
if (!VPV->isLiveIn())
@@ -188,7 +188,23 @@ struct bind_const_int {
const auto *CI = dyn_cast<ConstantInt>(V);
if (!CI)
return false;
- if (auto C = CI->getValue().tryZExtValue()) {
+ Res = &CI->getValue();
+ return true;
+ }
+};
+
+inline bind_apint m_APInt(const APInt *&C) { return C; }
+
+struct bind_const_int {
+ uint64_t &Res;
+
+ bind_const_int(uint64_t &Res) : Res(Res) {}
+
+ bool match(VPValue *VPV) const {
+ const APInt *APConst;
+ if (!bind_apint(APConst).match(VPV))
+ return false;
+ if (auto C = APConst->tryZExtValue()) {
Res = *C;
return true;
}
@@ -372,6 +388,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
+m_ExtractLastLanePerPart(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::ExtractLastLanePerPart>(Op0);
+}
+
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
@@ -394,6 +416,12 @@ m_AnyOf(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::AnyOf>(Op0);
}
+template <typename Op0_t>
+inline VPInstruction_match<VPInstruction::FirstActiveLane, Op0_t>
+m_FirstActiveLane(const Op0_t &Op0) {
+ return m_VPInstruction<VPInstruction::FirstActiveLane>(Op0);
+}
+
template <unsigned Opcode, typename Op0_t>
inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
return AllRecipe_match<Opcode, Op0_t>(Op0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index 0c27d53..fb17d5d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -168,7 +168,8 @@ void VPPredicator::createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
// non-phi instructions.
auto &Plan = *HeaderVPBB->getPlan();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+ auto *IV =
+ new VPWidenCanonicalIVRecipe(HeaderVPBB->getParent()->getCanonicalIV());
Builder.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
Builder.insert(IV);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 2368d18..7a98c75 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -341,12 +341,12 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
- if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
- auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
- if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
- InputTypeB = InputTypeA;
- ExtBType = ExtAType;
- }
+ using namespace VPlanPatternMatch;
+ const APInt *C;
+ if (!ExtBR && match(Widen->getOperand(1), m_APInt(C)) &&
+ canConstantBeExtended(C, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
}
};
@@ -511,6 +511,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
@@ -878,9 +879,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
return ReducedPartRdx;
}
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement: {
- unsigned Offset = getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
+ unsigned Offset =
+ getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;
Value *Res;
if (State.VF.isVector()) {
assert(Offset <= State.VF.getKnownMinValue() &&
@@ -1166,6 +1169,7 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractLastElement ||
+ getOpcode() == VPInstruction::ExtractLastLanePerPart ||
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::ExtractLane ||
@@ -1229,6 +1233,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExtractLane:
case VPInstruction::ExtractLastElement:
+ case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
@@ -1376,6 +1381,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractLastElement:
O << "extract-last-element";
break;
+ case VPInstruction::ExtractLastLanePerPart:
+ O << "extract-last-lane-per-part";
+ break;
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
@@ -2344,7 +2352,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
return false;
auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
+ auto *CanIV = getParent()->getParent()->getCanonicalIV();
return StartC && StartC->isZero() && StepC && StepC->isOne() &&
getScalarType() == CanIV->getScalarType();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 40b7e8d..cae9aee8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -501,7 +501,8 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
/// recipe, if it exists.
static void removeRedundantCanonicalIVs(VPlan &Plan) {
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
for (VPUser *U : CanonicalIV->users()) {
WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
@@ -512,7 +513,7 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) {
if (!WidenNewIV)
return;
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
@@ -582,8 +583,9 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
FPMathOperator *FPBinOp, Instruction *TruncI,
VPValue *StartV, VPValue *Step, DebugLoc DL,
VPBuilder &Builder) {
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
+ VPCanonicalIVPHIRecipe *CanonicalIV = LoopRegion->getCanonicalIV();
VPSingleDefRecipe *BaseIV = Builder.createDerivedIV(
Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");
@@ -786,9 +788,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
ScalarEvolution &SE) {
VPValue *Incoming, *Mask;
if (!match(Op, m_VPInstruction<VPInstruction::ExtractLane>(
- m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Mask)),
- m_VPValue(Incoming))))
+ m_FirstActiveLane(m_VPValue(Mask)), m_VPValue(Incoming))))
return nullptr;
auto *WideIV = getOptimizableIVOf(Incoming, SE);
@@ -800,8 +800,9 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
return nullptr;
// Calculate the final index.
- VPValue *EndValue = Plan.getCanonicalIV();
- auto CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *CanonicalIV = LoopRegion->getCanonicalIV();
+ Type *CanonicalIVType = CanonicalIV->getScalarType();
VPBuilder B(cast<VPBasicBlock>(PredVPBB));
DebugLoc DL = cast<VPInstruction>(Op)->getDebugLoc();
@@ -810,7 +811,8 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
Type *FirstActiveLaneType = TypeInfo.inferScalarType(FirstActiveLane);
FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
FirstActiveLaneType, DL);
- EndValue = B.createNaryOp(Instruction::Add, {EndValue, FirstActiveLane}, DL);
+ VPValue *EndValue =
+ B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);
// `getOptimizableIVOf()` always returns the pre-incremented IV, so if it
// changed it means the exit is using the incremented value, so we need to
@@ -1205,7 +1207,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_ExtractLastElement(m_BuildVector()))) {
+ if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
@@ -1271,13 +1274,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
- if (match(Def, m_ExtractLastElement(m_Broadcast(m_VPValue(A))))) {
+ if (match(Def,
+ m_CombineOr(m_ExtractLastElement(m_Broadcast(m_VPValue(A))),
+ m_ExtractLastLanePerPart(m_Broadcast(m_VPValue(A)))))) {
Def->replaceAllUsesWith(A);
return;
}
- if (match(Def,
- m_VPInstruction<VPInstruction::ExtractLastElement>(m_VPValue(A))) &&
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_VPValue(A)),
+ m_ExtractLastLanePerPart(m_VPValue(A)))) &&
((isa<VPInstruction>(A) && vputils::isSingleScalar(A)) ||
(isa<VPReplicateRecipe>(A) &&
cast<VPReplicateRecipe>(A)->isSingleScalar())) &&
@@ -1285,6 +1290,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
[Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {
return Def->replaceAllUsesWith(A);
}
+
+ if (Plan->getUF() == 1 &&
+ match(Def, m_ExtractLastLanePerPart(m_VPValue(A)))) {
+ return Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::ExtractLastElement, {A}));
+ }
}
void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
@@ -1322,8 +1333,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
Clone->insertBefore(RepOrWidenR);
- auto *Ext = new VPInstruction(VPInstruction::ExtractLastElement,
- {Clone->getOperand(0)});
+ unsigned ExtractOpc =
+ vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
+ ? VPInstruction::ExtractLastElement
+ : VPInstruction::ExtractLastLanePerPart;
+ auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
Ext->insertBefore(Clone);
Clone->setOperand(0, Ext);
RepR->eraseFromParent();
@@ -1337,7 +1351,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
return U->usesScalars(RepOrWidenR) ||
match(cast<VPRecipeBase>(U),
- m_ExtractLastElement(m_VPValue()));
+ m_CombineOr(m_ExtractLastElement(m_VPValue()),
+ m_ExtractLastLanePerPart(m_VPValue())));
}))
continue;
@@ -1530,7 +1545,7 @@ static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
});
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = Plan.getVectorLoopRegion()->getCanonicalIV();
if (!match(Cond, m_SpecificICmp(CmpInst::ICMP_EQ,
m_Specific(CanIV->getBackedgeValue()),
m_Specific(&Plan.getVectorTripCount()))))
@@ -2107,9 +2122,18 @@ static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
// Return true if we do not know how to (mechanically) hoist a given recipe
- // out of a loop region. Does not address legality concerns such as aliasing
- // or speculation safety.
+ // out of a loop region.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
+ // TODO: Relax checks in the future, e.g. we could also hoist reads, if
+ // their memory location is not modified in the vector loop.
+ if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
+ return true;
+
// Allocas cannot be hoisted.
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2117,17 +2141,18 @@ static void licm(VPlan &Plan) {
// Hoist any loop invariant recipes from the vector loop region to the
// preheader. Preform a shallow traversal of the vector loop region, to
- // exclude recipes in replicate regions.
+ // exclude recipes in replicate regions. Since the top-level blocks in the
+ // vector loop region are guaranteed to execute if the vector pre-header is,
+ // we don't need to check speculation safety.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ assert(Preheader->getSingleSuccessor() == LoopRegion &&
+ "Expected vector prehader's successor to be the vector loop region");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (CannotHoistRecipe(R))
continue;
- // TODO: Relax checks in the future, e.g. we could also hoist reads, if
- // their memory location is not modified in the vector loop.
- if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
- any_of(R.operands(), [](VPValue *Op) {
+ if (any_of(R.operands(), [](VPValue *Op) {
return !Op->isDefinedOutsideLoopRegions();
}))
continue;
@@ -2319,7 +2344,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = TopRegion->getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();
auto *CanonicalIVIncrement =
@@ -2358,7 +2383,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1));
auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{EntryIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.entry");
@@ -2394,13 +2419,15 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
SmallVector<VPValue *> WideCanonicalIVs;
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
- assert(count_if(Plan.getCanonicalIV()->users(),
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
+ assert(count_if(LoopRegion->getCanonicalIV()->users(),
IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
"Must have at most one VPWideCanonicalIVRecipe");
- if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
+ if (FoundWidenCanonicalIVUser !=
+ LoopRegion->getCanonicalIV()->users().end()) {
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
WideCanonicalIVs.push_back(WideCanonicalIV);
@@ -2408,7 +2435,7 @@ static VPSingleDefRecipe *findHeaderMask(VPlan &Plan) {
// Also include VPWidenIntOrFpInductionRecipes that represent a widened
// version of the canonical induction.
- VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
@@ -2441,8 +2468,9 @@ void VPlanTransforms::addActiveLaneMask(
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");
- auto *FoundWidenCanonicalIVUser = find_if(Plan.getCanonicalIV()->users(),
- IsaPred<VPWidenCanonicalIVRecipe>);
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto *FoundWidenCanonicalIVUser = find_if(
+ LoopRegion->getCanonicalIV()->users(), IsaPred<VPWidenCanonicalIVRecipe>);
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
VPSingleDefRecipe *HeaderMask = findHeaderMask(Plan);
@@ -2455,7 +2483,7 @@ void VPlanTransforms::addActiveLaneMask(
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
VPValue *ALMMultiplier = Plan.getOrAddLiveIn(
- ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1));
LaneMask =
B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},
@@ -2565,9 +2593,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
});
assert(all_of(Plan.getVFxUF().users(),
- [&Plan](VPUser *U) {
- return match(U, m_c_Add(m_Specific(Plan.getCanonicalIV()),
- m_Specific(&Plan.getVFxUF()))) ||
+ [&LoopRegion, &Plan](VPUser *U) {
+ return match(U,
+ m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
+ m_Specific(&Plan.getVFxUF()))) ||
isa<VPWidenPointerInductionRecipe>(U);
}) &&
"Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
@@ -2722,9 +2751,10 @@ void VPlanTransforms::addExplicitVectorLength(
VPlan &Plan, const std::optional<unsigned> &MaxSafeElements) {
if (Plan.hasScalarVFOnly())
return;
- VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
- auto *CanonicalIVPHI = Plan.getCanonicalIV();
+ auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();
auto *CanIVTy = CanonicalIVPHI->getScalarType();
VPValue *StartV = CanonicalIVPHI->getStartValue();
@@ -4164,7 +4194,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
// Adjust induction to reflect that the transformed plan only processes one
// original iteration.
- auto *CanIV = Plan.getCanonicalIV();
+ auto *CanIV = VectorLoop->getCanonicalIV();
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
VPBuilder PHBuilder(Plan.getVectorPreheader());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 1c4adfc..5aeda3e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -69,7 +69,8 @@ class UnrollState {
VPBasicBlock::iterator InsertPtForPhi);
VPValue *getConstantVPV(unsigned Part) {
- Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
+ Type *CanIVIntTy =
+ Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType();
return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
}
@@ -351,8 +352,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Compute*Result which combine all parts to compute the final value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
- match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
- m_VPValue(Op1))) ||
+ match(&R, m_FirstActiveLane(m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 66748c5..8b1b0e5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -53,7 +53,7 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
return Expanded;
}
-bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
+bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
if (isa<VPActiveLaneMaskPHIRecipe>(V))
return true;
@@ -67,12 +67,14 @@ bool vputils::isHeaderMask(const VPValue *V, VPlan &Plan) {
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
return B == Plan.getTripCount() &&
- (match(A, m_ScalarIVSteps(m_Specific(Plan.getCanonicalIV()), m_One(),
- m_Specific(&Plan.getVF()))) ||
+ (match(A,
+ m_ScalarIVSteps(
+ m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
+ m_One(), m_Specific(&Plan.getVF()))) ||
IsWideCanonicalIV(A));
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
- B == Plan.getOrCreateBackedgeTakenCount();
+ B == Plan.getBackedgeTakenCount();
}
const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
@@ -102,7 +104,8 @@ bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
return all_of(R->operands(), isUniformAcrossVFsAndUFs);
}
- auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV();
+ auto *CanonicalIV =
+ R->getParent()->getEnclosingLoopRegion()->getCanonicalIV();
// Canonical IV chain is uniform.
if (V == CanonicalIV || V == CanonicalIV->getBackedgeValue())
return true;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 0222b0a..cf95ac0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -90,7 +90,7 @@ inline bool isSingleScalar(const VPValue *VPV) {
}
/// Return true if \p V is a header mask in \p Plan.
-bool isHeaderMask(const VPValue *V, VPlan &Plan);
+bool isHeaderMask(const VPValue *V, const VPlan &Plan);
/// Checks if \p V is uniform across all VF lanes and UF parts. It is considered
/// as such if it is either loop invariant (defined outside the vector region)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 5262af6..91734a1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -298,11 +298,16 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
return false;
}
}
- if (const auto *EVL = dyn_cast<VPInstruction>(&R)) {
- if (EVL->getOpcode() == VPInstruction::ExplicitVectorLength &&
- !verifyEVLRecipe(*EVL)) {
- errs() << "EVL VPValue is not used correctly\n";
- return false;
+ if (const auto *VPI = dyn_cast<VPInstruction>(&R)) {
+ switch (VPI->getOpcode()) {
+ case VPInstruction::ExplicitVectorLength:
+ if (!verifyEVLRecipe(*VPI)) {
+ errs() << "EVL VPValue is not used correctly\n";
+ return false;
+ }
+ break;
+ default:
+ break;
}
}
}