aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/CFGuard/CFGuard.cpp25
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp22
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp39
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp36
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp54
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp34
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp59
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/Reg2Mem.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp34
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp111
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp5
22 files changed, 347 insertions, 209 deletions
diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index b73a0ce..4645670 100644
--- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -147,7 +147,7 @@ public:
private:
// Only add checks if the module has the cfguard=2 flag.
- int cfguard_module_flag = 0;
+ int CFGuardModuleFlag = 0;
StringRef GuardFnName;
Mechanism GuardMechanism = Mechanism::Check;
FunctionType *GuardFnType = nullptr;
@@ -162,9 +162,7 @@ public:
static char ID;
// Default constructor required for the INITIALIZE_PASS macro.
- CFGuard(CFGuardImpl::Mechanism M) : FunctionPass(ID), Impl(M) {
- initializeCFGuardPass(*PassRegistry::getPassRegistry());
- }
+ CFGuard(CFGuardImpl::Mechanism M) : FunctionPass(ID), Impl(M) {}
bool doInitialization(Module &M) override { return Impl.doInitialization(M); }
bool runOnFunction(Function &F) override { return Impl.runOnFunction(F); }
@@ -173,7 +171,6 @@ public:
} // end anonymous namespace
void CFGuardImpl::insertCFGuardCheck(CallBase *CB) {
-
assert(CB->getModule()->getTargetTriple().isOSWindows() &&
"Only applicable for Windows targets");
assert(CB->isIndirectCall() &&
@@ -202,7 +199,6 @@ void CFGuardImpl::insertCFGuardCheck(CallBase *CB) {
}
void CFGuardImpl::insertCFGuardDispatch(CallBase *CB) {
-
assert(CB->getModule()->getTargetTriple().isOSWindows() &&
"Only applicable for Windows targets");
assert(CB->isIndirectCall() &&
@@ -236,14 +232,13 @@ void CFGuardImpl::insertCFGuardDispatch(CallBase *CB) {
}
bool CFGuardImpl::doInitialization(Module &M) {
-
// Check if this module has the cfguard flag and read its value.
if (auto *MD =
mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("cfguard")))
- cfguard_module_flag = MD->getZExtValue();
+ CFGuardModuleFlag = MD->getZExtValue();
// Skip modules for which CFGuard checks have been disabled.
- if (cfguard_module_flag != 2)
+ if (CFGuardModuleFlag != 2)
return false;
// Set up prototypes for the guard check and dispatch functions.
@@ -264,9 +259,8 @@ bool CFGuardImpl::doInitialization(Module &M) {
}
bool CFGuardImpl::runOnFunction(Function &F) {
-
// Skip modules for which CFGuard checks have been disabled.
- if (cfguard_module_flag != 2)
+ if (CFGuardModuleFlag != 2)
return false;
SmallVector<CallBase *, 8> IndirectCalls;
@@ -286,19 +280,16 @@ bool CFGuardImpl::runOnFunction(Function &F) {
}
// If no checks are needed, return early.
- if (IndirectCalls.empty()) {
+ if (IndirectCalls.empty())
return false;
- }
// For each indirect call/invoke, add the appropriate dispatch or check.
if (GuardMechanism == Mechanism::Dispatch) {
- for (CallBase *CB : IndirectCalls) {
+ for (CallBase *CB : IndirectCalls)
insertCFGuardDispatch(CB);
- }
} else {
- for (CallBase *CB : IndirectCalls) {
+ for (CallBase *CB : IndirectCalls)
insertCFGuardCheck(CB);
- }
}
return true;
diff --git a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
index f166fef..cf7e450 100644
--- a/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp
@@ -153,26 +153,23 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
- BranchProbability MinBranchProbability(
- static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
- MinBlockCounterExecution);
-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
- auto Prob = BranchProbability::getBranchProbability(
- BFI.getBlockFreq(CB->getParent()).getFrequency(),
- BFI.getEntryFreq().getFrequency());
+ auto BlockFreq = BFI.getBlockFreq(CB->getParent()).getFrequency();
+ auto EntryFreq = BFI.getEntryFreq().getFrequency();
+ uint64_t MinFreq =
+ static_cast<uint64_t>(EntryFreq * CoroElideBranchRatio);
- if (Prob < MinBranchProbability) {
+ if (BlockFreq < MinFreq) {
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '"
<< ore::NV("caller", Caller->getName())
- << "' because of low probability: "
- << ore::NV("probability", Prob) << " (threshold: "
- << ore::NV("threshold", MinBranchProbability) << ")";
+ << "' because of low frequency: "
+ << ore::NV("block_freq", BlockFreq)
+ << " (threshold: " << ore::NV("min_freq", MinFreq) << ")";
});
continue;
}
@@ -188,7 +185,8 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
- << "' (probability: " << ore::NV("probability", Prob) << ")";
+ << "' (block_freq: " << ore::NV("block_freq", BlockFreq)
+ << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 5066a99..894d83f 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -6150,3 +6150,42 @@ void MemProfContextDisambiguation::run(
IndexCallsiteContextGraph CCG(Index, isPrevailing);
CCG.process();
}
+
+// Strips MemProf attributes and metadata. Can be invoked by the pass pipeline
+// when we don't have an index that has recorded that we are linking with
+// allocation libraries containing the necessary APIs for downstream
+// transformations.
+PreservedAnalyses MemProfRemoveInfo::run(Module &M, ModuleAnalysisManager &AM) {
+ // The profile matcher applies hotness attributes directly for allocations,
+ // and those will cause us to generate calls to the hot/cold interfaces
+ // unconditionally. If supports-hot-cold-new was not enabled in the LTO
+ // link then assume we don't want these calls (e.g. not linking with
+ // the appropriate library, or otherwise trying to disable this behavior).
+ bool Changed = false;
+ for (auto &F : M) {
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ if (CI->hasFnAttr("memprof")) {
+ CI->removeFnAttr("memprof");
+ Changed = true;
+ }
+ if (!CI->hasMetadata(LLVMContext::MD_callsite)) {
+ assert(!CI->hasMetadata(LLVMContext::MD_memprof));
+ continue;
+ }
+ // Strip off all memprof metadata as it is no longer needed.
+ // Importantly, this avoids the addition of new memprof attributes
+ // after inlining propagation.
+ CI->setMetadata(LLVMContext::MD_memprof, nullptr);
+ CI->setMetadata(LLVMContext::MD_callsite, nullptr);
+ Changed = true;
+ }
+ }
+ }
+ if (!Changed)
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 07ad65c..fba1ccf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1481,13 +1481,13 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
return new ICmpInst(Pred, Y, ConstantInt::get(SrcTy, C.logBase2()));
}
- if (Cmp.isEquality() && Trunc->hasOneUse()) {
+ if (Cmp.isEquality() && (Trunc->hasOneUse() || Trunc->hasNoUnsignedWrap())) {
// Canonicalize to a mask and wider compare if the wide type is suitable:
// (trunc X to i8) == C --> (X & 0xff) == (zext C)
if (!SrcTy->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
Constant *Mask =
ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcBits, DstBits));
- Value *And = Builder.CreateAnd(X, Mask);
+ Value *And = Trunc->hasNoUnsignedWrap() ? X : Builder.CreateAnd(X, Mask);
Constant *WideC = ConstantInt::get(SrcTy, C.zext(SrcBits));
return new ICmpInst(Pred, And, WideC);
}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 511bca4..2646334 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -605,17 +605,16 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
return Mapping;
}
-namespace llvm {
-void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
- bool IsKasan, uint64_t *ShadowBase,
- int *MappingScale, bool *OrShadowOffset) {
+void llvm::getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
*ShadowBase = Mapping.Offset;
*MappingScale = Mapping.Scale;
*OrShadowOffset = Mapping.OrShadowOffset;
}
-void removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
+void llvm::removeASanIncompatibleFnAttributes(Function &F, bool ReadsArgMem) {
// Sanitizer checks read from shadow, which invalidates memory(argmem: *).
//
// This is not only true for sanitized functions, because AttrInfer can
@@ -668,8 +667,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
CompileKernel(CompileKernel) {}
-} // namespace llvm
-
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -677,11 +674,10 @@ static uint64_t getRedzoneSizeForScale(int MappingScale) {
}
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
- if (TargetTriple.isOSEmscripten()) {
+ if (TargetTriple.isOSEmscripten())
return kAsanEmscriptenCtorAndDtorPriority;
- } else {
+ else
return kAsanCtorAndDtorPriority;
- }
}
static Twine genName(StringRef suffix) {
@@ -848,6 +844,7 @@ struct AddressSanitizer {
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
bool maybeInsertDynamicShadowAtFunctionEntry(Function &F);
void markEscapedLocalAllocas(Function &F);
+ void markCatchParametersAsUninteresting(Function &F);
private:
friend struct FunctionStackPoisoner;
@@ -3001,6 +2998,22 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
}
}
+// Mitigation for https://github.com/google/sanitizers/issues/749
+// We don't instrument Windows catch-block parameters to avoid
+// interfering with exception handling assumptions.
+void AddressSanitizer::markCatchParametersAsUninteresting(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (auto *CatchPad = dyn_cast<CatchPadInst>(&I)) {
+ // Mark the parameters to a catch-block as uninteresting to avoid
+ // instrumenting them.
+ for (Value *Operand : CatchPad->arg_operands())
+ if (auto *AI = dyn_cast<AllocaInst>(Operand))
+ ProcessedAllocas[AI] = false;
+ }
+ }
+ }
+}
bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
bool ShouldInstrument =
@@ -3045,6 +3058,9 @@ bool AddressSanitizer::instrumentFunction(Function &F,
// can be passed to that intrinsic.
markEscapedLocalAllocas(F);
+ if (TargetTriple.isOSWindows())
+ markCatchParametersAsUninteresting(F);
+
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
SmallPtrSet<Value *, 16> TempsToInstrument;
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 444b390..72e8e50 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -2092,8 +2092,6 @@ bool CHR::run() {
return Changed;
}
-namespace llvm {
-
ControlHeightReductionPass::ControlHeightReductionPass() {
parseCHRFilterFiles();
}
@@ -2116,5 +2114,3 @@ PreservedAnalyses ControlHeightReductionPass::run(
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c327311..7ebcc21 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 20733032..19eccb9 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -368,7 +368,7 @@ private:
Valid = false;
}
- bool reportInvalidCandidate(llvm::Statistic &Stat) const {
+ bool reportInvalidCandidate(Statistic &Stat) const {
using namespace ore;
assert(L && Preheader && "Fusion candidate not initialized properly!");
#if LLVM_ENABLE_STATS
@@ -445,6 +445,7 @@ struct FusionCandidateCompare {
"No dominance relationship between these fusion candidates!");
}
};
+} // namespace
using LoopVector = SmallVector<Loop *, 4>;
@@ -461,9 +462,15 @@ using LoopVector = SmallVector<Loop *, 4>;
using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
-#if !defined(NDEBUG)
-static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const FusionCandidate &FC) {
+#ifndef NDEBUG
+static void printLoopVector(const LoopVector &LV) {
+ dbgs() << "****************************\n";
+ for (const Loop *L : LV)
+ printLoop(*L, dbgs());
+ dbgs() << "****************************\n";
+}
+
+static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
if (FC.isValid())
OS << FC.Preheader->getName();
else
@@ -472,8 +479,8 @@ static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
return OS;
}
-static llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const FusionCandidateSet &CandSet) {
+static raw_ostream &operator<<(raw_ostream &OS,
+ const FusionCandidateSet &CandSet) {
for (const FusionCandidate &FC : CandSet)
OS << FC << '\n';
@@ -489,7 +496,9 @@ printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
dbgs() << "****************************\n";
}
}
-#endif
+#endif // NDEBUG
+
+namespace {
/// Collect all loops in function at the same nest level, starting at the
/// outermost level.
@@ -550,15 +559,6 @@ private:
LoopsOnLevelTy LoopsOnLevel;
};
-#ifndef NDEBUG
-static void printLoopVector(const LoopVector &LV) {
- dbgs() << "****************************\n";
- for (auto *L : LV)
- printLoop(*L, dbgs());
- dbgs() << "****************************\n";
-}
-#endif
-
struct LoopFuser {
private:
// Sets of control flow equivalent fusion candidates for a given nest level.
@@ -1850,7 +1850,7 @@ private:
/// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
template <typename RemarkKind>
void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
- llvm::Statistic &Stat) {
+ Statistic &Stat) {
assert(FC0.Preheader && FC1.Preheader &&
"Expecting valid fusion candidates");
using namespace ore;
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 32078b1..d827e64 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -8,7 +8,6 @@
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -16,8 +15,6 @@
using namespace llvm;
-namespace llvm {
-
/// Explicitly specialize the pass manager's run method to handle loop nest
/// structure updates.
PreservedAnalyses
@@ -185,7 +182,6 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
}
return PA;
}
-} // namespace llvm
void FunctionToLoopPassAdaptor::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
@@ -193,6 +189,7 @@ void FunctionToLoopPassAdaptor::printPipeline(
Pass->printPipeline(OS, MapClassName2PassName);
OS << ')';
}
+
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
@@ -219,9 +216,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
// Get the analysis results needed by loop passes.
MemorySSA *MSSA =
UseMemorySSA ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) : nullptr;
- BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
- ? (&AM.getResult<BlockFrequencyAnalysis>(F))
- : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -229,7 +223,6 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<ScalarEvolutionAnalysis>(F),
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
- BFI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 448dc2b..f3e6cbf 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -540,8 +540,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) {
return Changed;
}
-namespace llvm {
-
PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &LAR,
LPMUpdater &U) {
@@ -556,4 +554,3 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 7cae94eb..3487e81 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -97,6 +97,12 @@ static cl::opt<MatrixLayoutTy> MatrixLayout(
static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt",
cl::init(false));
+static cl::opt<unsigned> SplitMatmulRemainderOverThreshold(
+ "matrix-split-matmul-remainder-over-threshold", cl::Hidden,
+ cl::desc("Illegal remainder vectors over this size in bits should be split "
+ "in the inner loop of matmul"),
+ cl::init(0));
+
/// Helper function to either return Scope, if it is a subprogram or the
/// attached subprogram for a local scope.
static DISubprogram *getSubprogram(DIScope *Scope) {
@@ -115,18 +121,16 @@ static bool isSplat(Value *V) {
/// Match any mul operation (fp or integer).
template <typename LTy, typename RTy>
-auto m_AnyMul(const LTy &L, const RTy &R) {
+static auto m_AnyMul(const LTy &L, const RTy &R) {
return m_CombineOr(m_Mul(L, R), m_FMul(L, R));
}
/// Match any add operation (fp or integer).
template <typename LTy, typename RTy>
-auto m_AnyAdd(const LTy &L, const RTy &R) {
+static auto m_AnyAdd(const LTy &L, const RTy &R) {
return m_CombineOr(m_Add(L, R), m_FAdd(L, R));
}
-namespace {
-
// Given an element pointer \p BasePtr to the start of a (sub) matrix, compute
// the start address of vector \p VecIdx with type (\p EltType x \p NumElements)
// assuming \p Stride elements between start two consecutive vectors.
@@ -167,9 +171,9 @@ namespace {
// v_2_0 |v_2_1 |v_2_2 |v_2_3
// v_3_0 {v_3_1 {v_3_2 v_3_3
//
-Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
- unsigned NumElements, Type *EltType,
- IRBuilder<> &Builder) {
+static Value *computeVectorAddr(Value *BasePtr, Value *VecIdx, Value *Stride,
+ unsigned NumElements, Type *EltType,
+ IRBuilder<> &Builder) {
assert((!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() >= NumElements) &&
@@ -338,6 +342,8 @@ computeShapeInfoForInst(Instruction *I,
return std::nullopt;
}
+namespace {
+
/// LowerMatrixIntrinsics contains the methods used to lower matrix intrinsics.
///
/// Currently, the lowering for each matrix intrinsic is done as follows:
@@ -371,7 +377,8 @@ class LowerMatrixIntrinsics {
LoopInfo *LI = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
- /// Contains estimates of the number of operations (loads, stores, compute) required to lower a matrix operation.
+ /// Contains estimates of the number of operations (loads, stores, compute)
+ /// required to lower a matrix operation.
struct OpInfoTy {
/// Number of stores emitted to generate this matrix.
unsigned NumStores = 0;
@@ -1719,6 +1726,31 @@ public:
ToRemove.push_back(MatMul);
}
+ /// Given \p Remainder iterations of the the matmul inner loop,
+ /// potentially lower \p Blocksize that is used for the underlying
+ /// vector.
+ unsigned capBlockSize(unsigned BlockSize, unsigned Remainder, Type *EltType) {
+ if (BlockSize <= Remainder)
+ return BlockSize;
+
+ // If the remainder is also a legal type just use it.
+ auto *VecTy = FixedVectorType::get(EltType, Remainder);
+ if (TTI.isTypeLegal(VecTy))
+ return Remainder;
+
+ // Similarly, if the vector is small enough that we don't want
+ // to split further.
+ if (VecTy->getPrimitiveSizeInBits() <= SplitMatmulRemainderOverThreshold)
+ return Remainder;
+
+ // Gradually lower the vectorization factor to cover the
+ // remainder.
+ do {
+ BlockSize /= 2;
+ } while (BlockSize > Remainder);
+ return BlockSize;
+ }
+
/// Compute \p Result += \p A * \p B for input matrices with left-associating
/// addition.
///
@@ -1756,10 +1788,8 @@ public:
bool isSumZero = isa<ConstantAggregateZero>(Result.getColumn(J));
for (unsigned I = 0; I < R; I += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (I + BlockSize > R)
- BlockSize /= 2;
-
+ // Lower block size to make sure we stay within bounds.
+ BlockSize = capBlockSize(BlockSize, R - I, Result.getElementType());
Value *Sum = IsTiled ? Result.extractVector(I, J, BlockSize, Builder)
: nullptr;
for (unsigned K = 0; K < M; ++K) {
@@ -1784,9 +1814,8 @@ public:
unsigned BlockSize = VF;
bool isSumZero = isa<ConstantAggregateZero>(Result.getRow(I));
for (unsigned J = 0; J < C; J += BlockSize) {
- // Gradually lower the vectorization factor to cover the remainder.
- while (J + BlockSize > C)
- BlockSize /= 2;
+ // Lower the vectorization factor to cover the remainder.
+ BlockSize = capBlockSize(BlockSize, C - J, Result.getElementType());
Value *Sum = nullptr;
for (unsigned K = 0; K < M; ++K) {
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 80aa98d..5a8f18a 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -160,9 +160,6 @@ static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
//===----------------------------------------------------------------------===//
// Anchor methods.
-namespace llvm {
-namespace GVNExpression {
-
Expression::~Expression() = default;
BasicExpression::~BasicExpression() = default;
CallExpression::~CallExpression() = default;
@@ -171,9 +168,6 @@ StoreExpression::~StoreExpression() = default;
AggregateValueExpression::~AggregateValueExpression() = default;
PHIExpression::~PHIExpression() = default;
-} // end namespace GVNExpression
-} // end namespace llvm
-
namespace {
// Tarjan's SCC finding algorithm with Nuutila's improvements
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index ba58b8e..6d7ce36 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2623,32 +2623,32 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
namespace {
- class ReassociateLegacyPass : public FunctionPass {
- ReassociatePass Impl;
+class ReassociateLegacyPass : public FunctionPass {
+ ReassociatePass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
+public:
+ static char ID; // Pass identification, replacement for typeid
- ReassociateLegacyPass() : FunctionPass(ID) {
- initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ ReassociateLegacyPass() : FunctionPass(ID) {
+ initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
- FunctionAnalysisManager DummyFAM;
- auto PA = Impl.run(F, DummyFAM);
- return !PA.areAllPreserved();
- }
+ FunctionAnalysisManager DummyFAM;
+ auto PA = Impl.run(F, DummyFAM);
+ return !PA.areAllPreserved();
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
- };
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addPreserved<BasicAAWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
} // end anonymous namespace
diff --git a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
index 30b27cb..7646624 100644
--- a/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/llvm/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -107,9 +107,7 @@ PreservedAnalyses RegToMemPass::run(Function &F, FunctionAnalysisManager &AM) {
return PA;
}
-namespace llvm {
-
-void initializeRegToMemWrapperPassPass(PassRegistry &);
+namespace {
class RegToMemWrapperPass : public FunctionPass {
public:
@@ -136,7 +134,7 @@ public:
return N != 0 || Changed;
}
};
-} // namespace llvm
+} // namespace
INITIALIZE_PASS_BEGIN(RegToMemWrapperPass, "reg2mem", "", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index a692009..5c60fad 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -344,6 +344,12 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
uint64_t SliceSizeInBits, Instruction *OldInst,
Instruction *Inst, Value *Dest, Value *Value,
const DataLayout &DL) {
+ // If we want allocas to be migrated using this helper then we need to ensure
+ // that the BaseFragments map code still works. A simple solution would be
+ // to choose to always clone alloca dbg_assigns (rather than sometimes
+ // "stealing" them).
+ assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
+
auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
// Nothing to do if OldInst has no linked dbg.assign intrinsics.
if (DVRAssignMarkerRange.empty())
@@ -429,11 +435,22 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
}
- ::Value *NewValue = Value ? Value : DbgAssign->getValue();
- DbgVariableRecord *NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>(
- DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
- Dest, DIExpression::get(Expr->getContext(), {}),
- DbgAssign->getDebugLoc())));
+ DbgVariableRecord *NewAssign;
+ if (IsSplit) {
+ ::Value *NewValue = Value ? Value : DbgAssign->getValue();
+ NewAssign = cast<DbgVariableRecord>(cast<DbgRecord *>(
+ DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
+ Dest, DIExpression::get(Expr->getContext(), {}),
+ DbgAssign->getDebugLoc())));
+ } else {
+ // The store is not split, simply steal the existing dbg_assign.
+ NewAssign = DbgAssign;
+ NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
+ NewAssign->setAddress(Dest);
+ if (Value)
+ NewAssign->replaceVariableLocationOp(0u, Value);
+ assert(Expr == NewAssign->getExpression());
+ }
// If we've updated the value but the original dbg.assign has an arglist
// then kill it now - we can't use the requested new value.
@@ -464,9 +481,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
// noted as slightly offset (in code) from the store. In practice this
// should have little effect on the debugging experience due to the fact
// that all the split stores should get the same line number.
- NewAssign->moveBefore(DbgAssign->getIterator());
-
- NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
+ if (NewAssign != DbgAssign) {
+ NewAssign->moveBefore(DbgAssign->getIterator());
+ NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
+ }
LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
};
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index aae5d60..25a531c 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -50,9 +50,7 @@ using namespace llvm;
#define DEBUG_TYPE "scalarizer"
-namespace {
-
-BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
BasicBlock *BB = Itr->getParent();
if (isa<PHINode>(Itr))
Itr = BB->getFirstInsertionPt();
@@ -76,6 +74,8 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+namespace {
+
struct VectorSplit {
// The type of the vector.
FixedVectorType *VecTy = nullptr;
@@ -196,6 +196,7 @@ struct VectorLayout {
// The size of each (non-remainder) fragment in bytes.
uint64_t SplitSize = 0;
};
+} // namespace
static bool isStructOfMatchingFixedVectors(Type *Ty) {
if (!isa<StructType>(Ty))
@@ -268,6 +269,7 @@ static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
return Res;
}
+namespace {
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index e4ba70d..5af6c96 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -3611,8 +3610,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC, AAResults &AA,
TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial, ScalarEvolution *SE,
- MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, LPMUpdater &LoopUpdater) {
+ MemorySSAUpdater *MSSAU, LPMUpdater &LoopUpdater) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3652,35 +3650,6 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (F->hasOptSize())
return false;
- // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
- // of the loops L is nested in, and of the loops nested in L are all cold.
- auto IsLoopNestCold = [&](const Loop *L) {
- // Check L and all of its parent loops.
- auto *Parent = L;
- while (Parent) {
- if (!PSI->isColdBlock(Parent->getHeader(), BFI))
- return false;
- Parent = Parent->getParentLoop();
- }
- // Next check all loops nested within L.
- SmallVector<const Loop *, 4> Worklist;
- llvm::append_range(Worklist, L->getSubLoops());
- while (!Worklist.empty()) {
- auto *CurLoop = Worklist.pop_back_val();
- if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
- return false;
- llvm::append_range(Worklist, CurLoop->getSubLoops());
- }
- return true;
- };
-
- // Skip cold loops in cold loop nests, as unswitching them brings little
- // benefit but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
- LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
- return false;
- }
-
// Perform legality checks.
if (!isSafeForNoNTrivialUnswitching(L, LI))
return false;
@@ -3705,11 +3674,6 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;
- ProfileSummaryInfo *PSI = nullptr;
- if (auto OuterProxy =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR)
- .getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
- PSI = OuterProxy->getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L
<< "\n");
@@ -3720,7 +3684,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
- &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, U))
+ &AR.SE, MSSAU ? &*MSSAU : nullptr, U))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ebcbd2b..fa66a03 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -149,8 +149,6 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
return Impl.runImpl(F, TTI);
}
-namespace llvm {
-
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
@@ -328,11 +326,11 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
return true;
}
-FunctionPass *createSpeculativeExecutionPass() {
+FunctionPass *llvm::createSpeculativeExecutionPass() {
return new SpeculativeExecutionLegacyPass();
}
-FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
+FunctionPass *llvm::createSpeculativeExecutionIfHasBranchDivergencePass() {
return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
}
@@ -362,4 +360,3 @@ void SpeculativeExecutionPass::printPipeline(
OS << "only-if-divergent-target";
OS << '>';
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 7d01709..e94ad19 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -716,8 +716,6 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
return Ret;
}
-namespace llvm {
-
PreservedAnalyses
StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
const DataLayout *DL = &F.getDataLayout();
@@ -735,5 +733,3 @@ StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<TargetIRAnalysis>();
return PA;
}
-
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 1d83ddc..89d41f3e 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -192,7 +192,7 @@ struct AllocaDerivedValueTracker {
SmallPtrSet<Instruction *, 32> AllocaUsers;
SmallPtrSet<Instruction *, 32> EscapePoints;
};
-}
+} // namespace
static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
if (F.callsFunctionThatReturnsTwice())
@@ -967,7 +967,7 @@ struct TailCallElim : public FunctionPass {
/*BFI=*/nullptr);
}
};
-}
+} // namespace
char TailCallElim::ID = 0;
INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim", "Tail Call Elimination",
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 88af2cf..9cd52da 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2242,8 +2242,49 @@ public:
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const;
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const;
+
+ /// Return true if an array of scalar loads can be replaced with a strided
+ /// load (with constant stride).
+ ///
+ /// TODO:
+ /// It is possible that the load gets "widened". Suppose that originally each
+ /// load loads `k` bytes and `PointerOps` can be arranged as follows (`%s` is
+ /// constant): %b + 0 * %s + 0 %b + 0 * %s + 1 %b + 0 * %s + 2
+ /// ...
+ /// %b + 0 * %s + (w - 1)
+ ///
+ /// %b + 1 * %s + 0
+ /// %b + 1 * %s + 1
+ /// %b + 1 * %s + 2
+ /// ...
+ /// %b + 1 * %s + (w - 1)
+ /// ...
+ ///
+ /// %b + (n - 1) * %s + 0
+ /// %b + (n - 1) * %s + 1
+ /// %b + (n - 1) * %s + 2
+ /// ...
+ /// %b + (n - 1) * %s + (w - 1)
+ ///
+ /// In this case we will generate a strided load of type `<n x (k * w)>`.
+ ///
+ /// \param PointerOps list of pointer arguments of loads.
+ /// \param ElemTy original scalar type of loads.
+ /// \param Alignment alignment of the first load.
+ /// \param SortedIndices is the order of PointerOps as returned by
+ /// `sortPtrAccesses`
+ /// \param Diff Pointer difference between the lowest and the highes pointer
+ /// in `PointerOps` as returned by `getPointersDiff`.
+ /// \param Ptr0 first pointer in `PointersOps`.
+ /// \param PtrN last pointer in `PointersOps`.
+ /// \param SPtrInfo If the function return `true`, it also sets all the fields
+ /// of `SPtrInfo` necessary to generate the strided load later.
+ bool analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ElemTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Return true if an array of scalar loads can be replaced with a strided
/// load (with run-time stride).
@@ -6849,9 +6890,8 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
- Align Alignment, const int64_t Diff, Value *Ptr0,
- Value *PtrN, StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = PointerOps.size();
+ Align Alignment, const int64_t Diff,
+ const size_t Sz) const {
if (Diff % (Sz - 1) != 0)
return false;
@@ -6875,27 +6915,40 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
return false;
if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
+ return true;
+ }
+ return false;
+}
- // Iterate through all pointers and check if all distances are
- // unique multiple of Dist.
- SmallSet<int64_t, 4> Dists;
- for (Value *Ptr : PointerOps) {
- int64_t Dist = 0;
- if (Ptr == PtrN)
- Dist = Diff;
- else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
- // If the strides are not the same or repeated, we can't
- // vectorize.
- if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
- break;
- }
- if (Dists.size() == Sz) {
- Type *StrideTy = DL->getIndexType(Ptr0->getType());
- SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
- SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
- return true;
- }
+bool BoUpSLP::analyzeConstantStrideCandidate(
+ const ArrayRef<Value *> PointerOps, Type *ScalarTy, Align Alignment,
+ const SmallVectorImpl<unsigned> &SortedIndices, const int64_t Diff,
+ Value *Ptr0, Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
+ if (!isStridedLoad(PointerOps, ScalarTy, Alignment, Diff, Sz))
+ return false;
+
+ int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
+
+ // Iterate through all pointers and check if all distances are
+ // unique multiple of Dist.
+ SmallSet<int64_t, 4> Dists;
+ for (Value *Ptr : PointerOps) {
+ int64_t Dist = 0;
+ if (Ptr == PtrN)
+ Dist = Diff;
+ else if (Ptr != Ptr0)
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
+ // If the strides are not the same or repeated, we can't
+ // vectorize.
+ if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
+ break;
+ }
+ if (Dists.size() == Sz) {
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
+ SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
+ SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
+ return true;
}
return false;
}
@@ -6995,8 +7048,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
Align Alignment =
cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
->getAlign();
- if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
- SPtrInfo))
+ if (analyzeConstantStrideCandidate(PointerOps, ScalarTy, Alignment, Order,
+ *Diff, Ptr0, PtrN, SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
@@ -17632,7 +17685,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
}
if (IsPHI ||
(!E->isGather() && E->State != TreeEntry::SplitVectorize &&
- E->doesNotNeedToSchedule()) ||
+ (E->doesNotNeedToSchedule() ||
+ (E->hasCopyableElements() && !E->isCopyableElement(LastInst) &&
+ isUsedOutsideBlock(LastInst)))) ||
(GatheredLoadsEntriesFirst.has_value() &&
E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() &&
E->getOpcode() == Instruction::Load)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7f5a41c..cae9aee8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2124,6 +2124,11 @@ static void licm(VPlan &Plan) {
// Return true if we do not know how to (mechanically) hoist a given recipe
// out of a loop region.
auto CannotHoistRecipe = [](VPRecipeBase &R) {
+ // Assumes don't alias anything or throw; as long as they're guaranteed to
+ // execute, they're safe to hoist.
+ if (match(&R, m_Intrinsic<Intrinsic::assume>()))
+ return false;
+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
// their memory location is not modified in the vector loop.
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())