aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp11
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp29
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp29
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp48
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc2
-rw-r--r--llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/FunctionImportUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp117
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp44
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp20
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h4
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
24 files changed, 215 insertions, 181 deletions
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 83aa7de..28ee444 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -72,6 +72,7 @@ STATISTIC(NumImportedModules, "Number of modules imported from");
STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
STATISTIC(NumLiveSymbols, "Number of live symbols in index");
+namespace llvm {
cl::opt<bool>
ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
cl::desc("Import functions with noinline attribute"));
@@ -185,9 +186,8 @@ static cl::opt<bool> CtxprofMoveRootsToOwnModule(
extern cl::list<GlobalValue::GUID> MoveSymbolGUID;
-namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
-}
+} // end namespace llvm
// Load lazily a module from \p FileName in \p Context.
static std::unique_ptr<Module> loadFile(const std::string &FileName,
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 4f53738..150a2dc 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -28,10 +28,13 @@ using namespace llvm;
STATISTIC(NumSpecsCreated, "Number of specializations created");
+namespace llvm {
+
static cl::opt<bool> ForceSpecialization(
- "force-specialization", cl::init(false), cl::Hidden, cl::desc(
- "Force function specialization for every call site with a constant "
- "argument"));
+ "force-specialization", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Force function specialization for every call site with a constant "
+ "argument"));
static cl::opt<unsigned> MaxClones(
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
@@ -91,6 +94,8 @@ static cl::opt<bool> SpecializeLiteralConstant(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
BasicBlock *Succ) const {
unsigned I = 0;
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 15f4d76..c4f1b68 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -214,11 +214,12 @@ static cl::opt<bool> MemProfRequireDefinitionForPromotion(
"memprof-require-definition-for-promotion", cl::init(false), cl::Hidden,
cl::desc(
"Require target function definition when promoting indirect calls"));
-} // namespace llvm
extern cl::opt<bool> MemProfReportHintedSizes;
extern cl::opt<unsigned> MinClonedColdBytePercent;
+} // namespace llvm
+
namespace {
/// CRTP base for graphs built from either IR or ThinLTO summary index.
///
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 99b8b88..e39e311 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -116,6 +116,8 @@ STATISTIC(
NumCSInlinedHitGrowthLimit,
"Number of functions with FDO inline stopped due to growth size limit");
+namespace llvm {
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -198,7 +200,6 @@ static cl::opt<bool> DisableSampleLoaderInlining(
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
-namespace llvm {
cl::opt<bool>
SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
cl::desc("Sort profiled recursion by edge weights."));
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 093a39e..70b8614 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -23,6 +23,8 @@ using namespace sampleprof;
#define DEBUG_TYPE "sample-profile-matcher"
+namespace llvm {
+
static cl::opt<unsigned> FuncProfileSimilarityThreshold(
"func-profile-similarity-threshold", cl::Hidden, cl::init(80),
cl::desc("Consider a profile matches a function if the similarity of their "
@@ -55,6 +57,8 @@ static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
cl::desc("The maximum number of callsites in a function, above which stale "
"profile matching will be skipped."));
+} // end namespace llvm
+
void SampleProfileMatcher::findIRAnchors(const Function &F,
AnchorMap &IRAnchors) const {
// For inlined code, recover the original callsite and callee by finding the
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 09bffa7..ac41fdd 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -120,6 +120,8 @@ STATISTIC(NumVirtConstProp1Bit,
"Number of 1 bit virtual constant propagations");
STATISTIC(NumVirtConstProp, "Number of virtual constant propagations");
+namespace llvm {
+
static cl::opt<PassSummaryAction> ClSummaryAction(
"wholeprogramdevirt-summary-action",
cl::desc("What to do with the summary when running this pass"),
@@ -175,6 +177,8 @@ static cl::list<std::string>
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
/// With Clang, a pure virtual class's deleting destructor is emitted as a
/// `llvm.trap` intrinsic followed by an unreachable IR instruction. In the
/// context of whole program devirtualization, the deleting destructor of a pure
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b6b3a95..87000a1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2934,32 +2934,6 @@ static Instruction *foldSelectWithSRem(SelectInst &SI, InstCombinerImpl &IC,
return nullptr;
}
-static Value *foldSelectWithFrozenICmp(SelectInst &Sel, InstCombiner::BuilderTy &Builder) {
- FreezeInst *FI = dyn_cast<FreezeInst>(Sel.getCondition());
- if (!FI)
- return nullptr;
-
- Value *Cond = FI->getOperand(0);
- Value *TrueVal = Sel.getTrueValue(), *FalseVal = Sel.getFalseValue();
-
- // select (freeze(x == y)), x, y --> y
- // select (freeze(x != y)), x, y --> x
- // The freeze should be only used by this select. Otherwise, remaining uses of
- // the freeze can observe a contradictory value.
- // c = freeze(x == y) ; Let's assume that y = poison & x = 42; c is 0 or 1
- // a = select c, x, y ;
- // f(a, c) ; f(poison, 1) cannot happen, but if a is folded
- // ; to y, this can happen.
- CmpPredicate Pred;
- if (FI->hasOneUse() &&
- match(Cond, m_c_ICmp(Pred, m_Specific(TrueVal), m_Specific(FalseVal))) &&
- (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)) {
- return Pred == ICmpInst::ICMP_EQ ? FalseVal : TrueVal;
- }
-
- return nullptr;
-}
-
/// Given that \p CondVal is known to be \p CondIsTrue, try to simplify \p SI.
static Value *simplifyNestedSelectsUsingImpliedCond(SelectInst &SI,
Value *CondVal,
@@ -4446,9 +4420,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Instruction *PN = foldSelectToPhi(SI, DT, Builder))
return replaceInstUsesWith(SI, PN);
- if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder))
- return replaceInstUsesWith(SI, Fr);
-
if (Value *V = foldRoundUpIntegerWithPow2Alignment(SI, Builder))
return replaceInstUsesWith(SI, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 6ef3066..18a45c6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -319,20 +319,20 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
return nullptr;
}
-/// Find elements of V demanded by UserInstr.
-static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
+/// Find elements of V demanded by UserInstr. If returns false, we were not able
+/// to determine all elements.
+static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr,
+ APInt &UnionUsedElts) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
- // Conservatively assume that all elements are needed.
- APInt UsedElts(APInt::getAllOnes(VWidth));
-
switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
assert(EEI->getVectorOperand() == V);
ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
- UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
+ UnionUsedElts.setBit(EEIIndexC->getZExtValue());
+ return true;
}
break;
}
@@ -341,23 +341,23 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned MaskNumElts =
cast<FixedVectorType>(UserInstr->getType())->getNumElements();
- UsedElts = APInt(VWidth, 0);
- for (unsigned i = 0; i < MaskNumElts; i++) {
- unsigned MaskVal = Shuffle->getMaskValue(i);
+ for (auto I : llvm::seq(MaskNumElts)) {
+ unsigned MaskVal = Shuffle->getMaskValue(I);
if (MaskVal == -1u || MaskVal >= 2 * VWidth)
continue;
if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
- UsedElts.setBit(MaskVal);
+ UnionUsedElts.setBit(MaskVal);
if (Shuffle->getOperand(1) == V &&
((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
- UsedElts.setBit(MaskVal - VWidth);
+ UnionUsedElts.setBit(MaskVal - VWidth);
}
- break;
+ return true;
}
default:
break;
}
- return UsedElts;
+
+ return false;
}
/// Find union of elements of V demanded by all its users.
@@ -370,7 +370,8 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
APInt UnionUsedElts(VWidth, 0);
for (const Use &U : V->uses()) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
- UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
+ if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
+ return APInt::getAllOnes(VWidth);
} else {
UnionUsedElts = APInt::getAllOnes(VWidth);
break;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5d2d79e..917004c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -132,9 +132,11 @@ STATISTIC(NumReassoc , "Number of reassociations");
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
-static cl::opt<bool>
-EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
- cl::init(true));
+namespace llvm {
+
+static cl::opt<bool> EnableCodeSinking("instcombine-code-sinking",
+ cl::desc("Enable code sinking"),
+ cl::init(true));
static cl::opt<unsigned> MaxSinkNumUsers(
"instcombine-max-sink-users", cl::init(32),
@@ -156,6 +158,8 @@ extern cl::opt<bool> ProfcheckDisableMetadataFixes;
static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
cl::Hidden, cl::init(true));
+} // end namespace llvm
+
std::optional<Instruction *>
InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
// Handle target specific intrinsics
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0249f21..cf87e35 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -55,11 +55,11 @@ using namespace llvm;
STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+namespace llvm {
extern cl::opt<unsigned> MaxNumVTableAnnotations;
-namespace llvm {
extern cl::opt<bool> EnableVTableProfileUse;
-}
+} // namespace llvm
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index cf076b9a..eff6f0c 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -1923,20 +1923,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
///
/// Shadow = ParamTLS+ArgOffset.
Value *getShadowPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
- if (ArgOffset)
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg");
+ return IRB.CreatePtrAdd(MS.ParamTLS,
+ ConstantInt::get(MS.IntptrTy, ArgOffset), "_msarg");
}
/// Compute the origin address for a given function argument.
Value *getOriginPtrForArgument(IRBuilder<> &IRB, int ArgOffset) {
if (!MS.TrackOrigins)
return nullptr;
- Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
- if (ArgOffset)
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, IRB.getPtrTy(0), "_msarg_o");
+ return IRB.CreatePtrAdd(MS.ParamOriginTLS,
+ ConstantInt::get(MS.IntptrTy, ArgOffset),
+ "_msarg_o");
}
/// Compute the shadow address for a retval.
@@ -7219,9 +7216,8 @@ struct VarArgHelperBase : public VarArgHelper {
/// Compute the shadow address for a given va_arg.
Value *getShadowPtrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_s");
+ return IRB.CreatePtrAdd(
+ MS.VAArgTLS, ConstantInt::get(MS.IntptrTy, ArgOffset), "_msarg_va_s");
}
/// Compute the shadow address for a given va_arg.
@@ -7235,12 +7231,12 @@ struct VarArgHelperBase : public VarArgHelper {
/// Compute the origin address for a given va_arg.
Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
- Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
// getOriginPtrForVAArgument() is always called after
// getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
// overflow.
- Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
- return IRB.CreateIntToPtr(Base, MS.PtrTy, "_msarg_va_o");
+ return IRB.CreatePtrAdd(MS.VAArgOriginTLS,
+ ConstantInt::get(MS.IntptrTy, ArgOffset),
+ "_msarg_va_o");
}
void CleanUnusedTLS(IRBuilder<> &IRB, Value *ShadowBase,
@@ -7467,10 +7463,8 @@ struct VarArgAMD64Helper : public VarArgHelperBase {
NextNodeIRBuilder IRB(OrigInst);
Value *VAListTag = OrigInst->getArgOperand(0);
- Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, 16)),
- MS.PtrTy);
+ Value *RegSaveAreaPtrPtr =
+ IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, 16));
Value *RegSaveAreaPtr = IRB.CreateLoad(MS.PtrTy, RegSaveAreaPtrPtr);
Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
const Align Alignment = Align(16);
@@ -7482,10 +7476,8 @@ struct VarArgAMD64Helper : public VarArgHelperBase {
if (MS.TrackOrigins)
IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
Alignment, AMD64FpEndOffset);
- Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, 8)),
- MS.PtrTy);
+ Value *OverflowArgAreaPtrPtr =
+ IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, 8));
Value *OverflowArgAreaPtr =
IRB.CreateLoad(MS.PtrTy, OverflowArgAreaPtrPtr);
Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
@@ -7615,19 +7607,15 @@ struct VarArgAArch64Helper : public VarArgHelperBase {
// Retrieve a va_list field of 'void*' size.
Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
- Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, offset)),
- MS.PtrTy);
+ Value *SaveAreaPtrPtr =
+ IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, offset));
return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
}
// Retrieve a va_list field of 'int' size.
Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
- Value *SaveAreaPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
- ConstantInt::get(MS.IntptrTy, offset)),
- MS.PtrTy);
+ Value *SaveAreaPtr =
+ IRB.CreatePtrAdd(VAListTag, ConstantInt::get(MS.IntptrTy, offset));
Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index d9e850e..120c4f6 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -222,7 +222,6 @@ cl::opt<bool> NoPGOWarnMismatchComdatWeak(
cl::desc("The option is used to turn on/off "
"warnings about hash mismatch for comdat "
"or weak functions."));
-} // namespace llvm
// Command line option to enable/disable select instruction instrumentation.
static cl::opt<bool>
@@ -347,7 +346,6 @@ cl::list<std::string> CtxPGOSkipCallsiteInstrument(
extern cl::opt<unsigned> MaxNumVTableAnnotations;
-namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
extern cl::opt<PGOViewCountsType> PGOViewCounts;
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 343bec3..a5f417a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -54,6 +54,8 @@ using namespace llvm;
STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
+namespace llvm {
+
// The minimum call count to optimize memory intrinsic calls.
static cl::opt<unsigned>
MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::init(1000),
@@ -93,6 +95,8 @@ static cl::opt<unsigned>
MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
cl::desc("Optimize the memop size <= this value"));
+} // end namespace llvm
+
namespace {
static const char *getMIName(const MemIntrinsic *MI) {
diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index a3d4e53..0534fdd 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -21,7 +21,9 @@
using namespace llvm;
using CandidateInfo = ValueProfileCollector::CandidateInfo;
+namespace llvm {
extern cl::opt<bool> MemOPOptMemcmpBcmp;
+} // end namespace llvm
///--------------------------- MemIntrinsicPlugin ------------------------------
class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> {
diff --git a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
index 2025fbb..3c14036e 100644
--- a/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpTableToSwitch.cpp
@@ -26,6 +26,8 @@
using namespace llvm;
+namespace llvm {
+
static cl::opt<unsigned>
JumpTableSizeThreshold("jump-table-to-switch-size-threshold", cl::Hidden,
cl::desc("Only split jump tables with size less or "
@@ -43,6 +45,8 @@ static cl::opt<unsigned> FunctionSizeThreshold(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
#define DEBUG_TYPE "jump-table-to-switch"
namespace {
@@ -201,14 +205,12 @@ PreservedAnalyses JumpTableToSwitchPass::run(Function &F,
PostDominatorTree *PDT = AM.getCachedResult<PostDominatorTreeAnalysis>(F);
DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
- InstrProfSymtab Symtab;
- if (auto E = Symtab.create(*F.getParent()))
- F.getContext().emitError(
- "Could not create indirect call table, likely corrupted IR" +
- toString(std::move(E)));
- DenseMap<const Function *, GlobalValue::GUID> FToGuid;
- for (const auto &[G, FPtr] : Symtab.getIDToNameMap())
- FToGuid.insert({FPtr, G});
+ auto FuncToGuid = [&](const Function &Fct) {
+ if (Fct.getMetadata(AssignGUIDPass::GUIDMetadataName))
+ return AssignGUIDPass::getGUID(Fct);
+
+ return Function::getGUIDAssumingExternalLinkage(getIRPGOFuncName(F, InLTO));
+ };
for (BasicBlock &BB : make_early_inc_range(F)) {
BasicBlock *CurrentBB = &BB;
@@ -230,12 +232,8 @@ PreservedAnalyses JumpTableToSwitchPass::run(Function &F,
std::optional<JumpTableTy> JumpTable = parseJumpTable(GEP, PtrTy);
if (!JumpTable)
continue;
- SplittedOutTail = expandToSwitch(
- Call, *JumpTable, DTU, ORE, [&](const Function &Fct) {
- if (Fct.getMetadata(AssignGUIDPass::GUIDMetadataName))
- return AssignGUIDPass::getGUID(Fct);
- return FToGuid.lookup_or(&Fct, 0U);
- });
+ SplittedOutTail =
+ expandToSwitch(Call, *JumpTable, DTU, ORE, FuncToGuid);
Changed = true;
break;
}
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bab1f2a..9655173 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -116,6 +116,8 @@ STATISTIC(NumIntAssociationsHoisted,
STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
"reassociated and hoisted out of the loop");
+namespace llvm {
+
/// Memory promotion is enabled by default.
static cl::opt<bool>
DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
@@ -154,7 +156,7 @@ static cl::opt<unsigned> IntAssociationUpperLimit(
// which may not be precise, since optimizeUses is capped. The result is
// correct, but we may not get as "far up" as possible to get which access is
// clobbering the one queried.
-cl::opt<unsigned> llvm::SetLicmMssaOptCap(
+cl::opt<unsigned> SetLicmMssaOptCap(
"licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
"for faster compile. Caps the MemorySSA clobbering calls."));
@@ -162,7 +164,7 @@ cl::opt<unsigned> llvm::SetLicmMssaOptCap(
// Experimentally, memory promotion carries less importance than sinking and
// hoisting. Limit when we do promotion when using MemorySSA, in order to save
// compile time.
-cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
+cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap(
"licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
"effect. When MSSA in LICM is enabled, then this is the maximum "
@@ -171,6 +173,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
diff --git a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index 1a9e16b..d31154f 100644
--- a/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
+namespace llvm {
+
/// Uses the "source_filename" instead of a Module hash ID for the suffix of
/// promoted locals during LTO. NOTE: This requires that the source filename
/// has a unique name / path to avoid name collisions.
@@ -35,6 +37,8 @@ cl::list<GlobalValue::GUID> MoveSymbolGUID(
"used with the name of contextual profiling roots."),
cl::Hidden);
+} // end namespace llvm
+
FunctionImportGlobalProcessing::FunctionImportGlobalProcessing(
Module &M, const ModuleSummaryIndex &Index,
SetVector<GlobalValue *> *GlobalsToImport, bool ClearDSOLocalOnDeclarations)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 4d1f768..8bba634 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -95,7 +95,9 @@ using namespace PatternMatch;
#define DEBUG_TYPE "simplifycfg"
-cl::opt<bool> llvm::RequireAndPreserveDomTree(
+namespace llvm {
+
+cl::opt<bool> RequireAndPreserveDomTree(
"simplifycfg-require-and-preserve-domtree", cl::Hidden,
cl::desc(
@@ -205,6 +207,8 @@ static cl::opt<unsigned> MaxJumpThreadingLiveBlocks(
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // end namespace llvm
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 12fb46d..7fa787b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5699,6 +5699,20 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
Worklist.push_back(InstOp);
}
+ auto UpdateMemOpUserCost = [this, VF](LoadInst *LI) {
+ // If there are direct memory op users of the newly scalarized load,
+ // their cost may have changed because there's no scalarization
+ // overhead for the operand. Update it.
+ for (User *U : LI->users()) {
+ if (!isa<LoadInst, StoreInst>(U))
+ continue;
+ if (getWideningDecision(cast<Instruction>(U), VF) != CM_Scalarize)
+ continue;
+ setWideningDecision(
+ cast<Instruction>(U), VF, CM_Scalarize,
+ getMemInstScalarizationCost(cast<Instruction>(U), VF));
+ }
+ };
for (auto *I : AddrDefs) {
if (isa<LoadInst>(I)) {
// Setting the desired widening decision should ideally be handled in
@@ -5708,21 +5722,24 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
InstWidening Decision = getWideningDecision(I, VF);
if (Decision == CM_Widen || Decision == CM_Widen_Reverse ||
(!isPredicatedInst(I) && !Legal->isUniformMemOp(*I, VF) &&
- Decision == CM_Scalarize))
+ Decision == CM_Scalarize)) {
// Scalarize a widened load of address or update the cost of a scalar
// load of an address.
setWideningDecision(
I, VF, CM_Scalarize,
(VF.getKnownMinValue() *
getMemoryInstructionCost(I, ElementCount::getFixed(1))));
- else if (const auto *Group = getInterleavedAccessGroup(I)) {
+ UpdateMemOpUserCost(cast<LoadInst>(I));
+ } else if (const auto *Group = getInterleavedAccessGroup(I)) {
// Scalarize an interleave group of address loads.
for (unsigned I = 0; I < Group->getFactor(); ++I) {
- if (Instruction *Member = Group->getMember(I))
+ if (Instruction *Member = Group->getMember(I)) {
setWideningDecision(
Member, VF, CM_Scalarize,
(VF.getKnownMinValue() *
getMemoryInstructionCost(Member, ElementCount::getFixed(1))));
+ UpdateMemOpUserCost(cast<LoadInst>(Member));
+ }
}
}
} else {
@@ -7937,6 +7954,13 @@ bool VPRecipeBuilder::getScaledReductions(
auto CollectExtInfo = [this, &Exts, &ExtOpTypes,
&ExtKinds](SmallVectorImpl<Value *> &Ops) -> bool {
for (const auto &[I, OpI] : enumerate(Ops)) {
+ auto *CI = dyn_cast<ConstantInt>(OpI);
+ if (I > 0 && CI &&
+ canConstantBeExtended(CI, ExtOpTypes[0], ExtKinds[0])) {
+ ExtOpTypes[I] = ExtOpTypes[0];
+ ExtKinds[I] = ExtKinds[0];
+ continue;
+ }
Value *ExtOp;
if (!match(OpI, m_ZExtOrSExt(m_Value(ExtOp))))
return false;
@@ -9521,55 +9545,52 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- DenseMap<Value *, Value *> ToFrozen;
- SmallVector<Instruction *> InstsToMove;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
- for (VPRecipeBase &R : Header->phis()) {
- if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) {
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
- // FIXME: Improve modeling for canonical IV start values in the epilogue
- // loop.
- using namespace llvm::PatternMatch;
- PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
- for (Value *Inc : EPResumeVal->incoming_values()) {
- if (match(Inc, m_SpecificInt(0)))
- continue;
- assert(!EPI.VectorTripCount &&
- "Must only have a single non-zero incoming value");
- EPI.VectorTripCount = Inc;
- }
- // If we didn't find a non-zero vector trip count, all incoming values
- // must be zero, which also means the vector trip count is zero. Pick the
- // first zero as vector trip count.
- // TODO: We should not choose VF * UF so the main vector loop is known to
- // be dead.
- if (!EPI.VectorTripCount) {
- assert(
- EPResumeVal->getNumIncomingValues() > 0 &&
- all_of(EPResumeVal->incoming_values(),
- [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
- "all incoming values must be 0");
- EPI.VectorTripCount = EPResumeVal->getOperand(0);
- }
- VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
- assert(all_of(IV->users(),
- [](const VPUser *U) {
- return isa<VPScalarIVStepsRecipe>(U) ||
- isa<VPDerivedIVRecipe>(U) ||
- cast<VPRecipeBase>(U)->isScalarCast() ||
- cast<VPInstruction>(U)->getOpcode() ==
- Instruction::Add;
- }) &&
- "the canonical IV should only be used by its increment or "
- "ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+ // When vectorizing the epilogue loop, the canonical induction start
+ // value needs to be changed from zero to the value after the main
+ // vector loop. Find the resume value created during execution of the main
+ // VPlan. It must be the first phi in the loop preheader.
+ // FIXME: Improve modeling for canonical IV start values in the epilogue
+ // loop.
+ using namespace llvm::PatternMatch;
+ PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
+ for (Value *Inc : EPResumeVal->incoming_values()) {
+ if (match(Inc, m_SpecificInt(0)))
continue;
- }
+ assert(!EPI.VectorTripCount &&
+ "Must only have a single non-zero incoming value");
+ EPI.VectorTripCount = Inc;
+ }
+ // If we didn't find a non-zero vector trip count, all incoming values
+ // must be zero, which also means the vector trip count is zero. Pick the
+ // first zero as vector trip count.
+ // TODO: We should not choose VF * UF so the main vector loop is known to
+ // be dead.
+ if (!EPI.VectorTripCount) {
+ assert(EPResumeVal->getNumIncomingValues() > 0 &&
+ all_of(EPResumeVal->incoming_values(),
+ [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
+ "all incoming values must be 0");
+ EPI.VectorTripCount = EPResumeVal->getOperand(0);
+ }
+ VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+ assert(all_of(IV->users(),
+ [](const VPUser *U) {
+ return isa<VPScalarIVStepsRecipe>(U) ||
+ isa<VPDerivedIVRecipe>(U) ||
+ cast<VPRecipeBase>(U)->isScalarCast() ||
+ cast<VPInstruction>(U)->getOpcode() ==
+ Instruction::Add;
+ }) &&
+ "the canonical IV should only be used by its increment or "
+ "ScalarIVSteps when resetting the start value");
+ IV->setOperand(0, VPV);
+ DenseMap<Value *, Value *> ToFrozen;
+ SmallVector<Instruction *> InstsToMove;
+ for (VPRecipeBase &R : drop_begin(Header->phis())) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f77d587..fedca65 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2241,10 +2241,9 @@ public:
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
bool isLoadCombineCandidate(ArrayRef<Value *> Stores) const;
- bool isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order, const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff, StridedPtrInfo &SPtrInfo) const;
+ bool isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, const int64_t Diff, Value *Ptr0,
+ Value *PtrN, StridedPtrInfo &SPtrInfo) const;
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
@@ -6824,13 +6823,10 @@ isMaskedLoadCompress(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
/// 4. Any pointer operand is an instruction with the users outside of the
/// current graph (for masked gathers extra extractelement instructions
/// might be required).
-bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
- ArrayRef<unsigned> Order,
- const TargetTransformInfo &TTI,
- const DataLayout &DL, ScalarEvolution &SE,
- const int64_t Diff,
- StridedPtrInfo &SPtrInfo) const {
- const size_t Sz = VL.size();
+bool BoUpSLP::isStridedLoad(ArrayRef<Value *> PointerOps, Type *ScalarTy,
+ Align Alignment, const int64_t Diff, Value *Ptr0,
+ Value *PtrN, StridedPtrInfo &SPtrInfo) const {
+ const size_t Sz = PointerOps.size();
if (Diff % (Sz - 1) != 0)
return false;
@@ -6842,7 +6838,6 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
});
const uint64_t AbsoluteDiff = std::abs(Diff);
- Type *ScalarTy = VL.front()->getType();
auto *VecTy = getWidenedType(ScalarTy, Sz);
if (IsAnyPointerUsedOutGraph ||
(AbsoluteDiff > Sz &&
@@ -6853,20 +6848,9 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
int64_t Stride = Diff / static_cast<int64_t>(Sz - 1);
if (Diff != Stride * static_cast<int64_t>(Sz - 1))
return false;
- Align Alignment =
- cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
- ->getAlign();
- if (!TTI.isLegalStridedLoadStore(VecTy, Alignment))
+ if (!TTI->isLegalStridedLoadStore(VecTy, Alignment))
return false;
- Value *Ptr0;
- Value *PtrN;
- if (Order.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
- } else {
- Ptr0 = PointerOps[Order.front()];
- PtrN = PointerOps[Order.back()];
- }
+
// Iterate through all pointers and check if all distances are
// unique multiple of Dist.
SmallSet<int64_t, 4> Dists;
@@ -6875,14 +6859,14 @@ bool BoUpSLP::isStridedLoad(ArrayRef<Value *> VL, ArrayRef<Value *> PointerOps,
if (Ptr == PtrN)
Dist = Diff;
else if (Ptr != Ptr0)
- Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE);
+ Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, *DL, *SE);
// If the strides are not the same or repeated, we can't
// vectorize.
if (((Dist / Stride) * Stride) != Dist || !Dists.insert(Dist).second)
break;
}
if (Dists.size() == Sz) {
- Type *StrideTy = DL.getIndexType(Ptr0->getType());
+ Type *StrideTy = DL->getIndexType(Ptr0->getType());
SPtrInfo.StrideVal = ConstantInt::get(StrideTy, Stride);
SPtrInfo.Ty = getWidenedType(ScalarTy, Sz);
return true;
@@ -6971,7 +6955,11 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads(
cast<Instruction>(V), UserIgnoreList);
}))
return LoadsState::CompressVectorize;
- if (isStridedLoad(VL, PointerOps, Order, *TTI, *DL, *SE, *Diff, SPtrInfo))
+ Align Alignment =
+ cast<LoadInst>(Order.empty() ? VL.front() : VL[Order.front()])
+ ->getAlign();
+ if (isStridedLoad(PointerOps, ScalarTy, Alignment, *Diff, Ptr0, PtrN,
+ SPtrInfo))
return LoadsState::StridedVectorize;
}
if (!TTI->isLegalMaskedGather(VecTy, CommonAlignment) ||
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index ffd2e59..07b191a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -978,6 +978,16 @@ void VPlan::execute(VPTransformState *State) {
// If the original loop is unreachable, delete it and all its blocks.
if (!ScalarPhVPBB->hasPredecessors()) {
+ // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
+ // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
+ // IR instructions.
+ for (VPIRBasicBlock *EB : getExitBlocks()) {
+ for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
+ if (R.getNumOperands() == 1)
+ R.eraseFromParent();
+ }
+ }
+
Loop *OrigLoop =
State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
auto Blocks = OrigLoop->getBlocksVector();
@@ -1743,6 +1753,16 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
+bool llvm::canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+ TTI::PartialReductionExtendKind ExtKind) {
+ APInt TruncatedVal = CI->getValue().trunc(NarrowType->getScalarSizeInBits());
+ unsigned WideSize = CI->getType()->getScalarSizeInBits();
+ APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
+ ? TruncatedVal.sext(WideSize)
+ : TruncatedVal.zext(WideSize);
+ return ExtendedVal == CI->getValue();
+}
+
TargetTransformInfo::OperandValueInfo
VPCostContext::getOperandInfo(VPValue *V) const {
if (!V->isLiveIn())
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index fe59774..fc1a09e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -468,6 +468,10 @@ public:
};
#endif
+/// Check if a constant \p CI can be safely treated as having been extended
+/// from a narrower type with the given extension kind.
+bool canConstantBeExtended(const ConstantInt *CI, Type *NarrowType,
+ TTI::PartialReductionExtendKind ExtKind);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 46909a5..67b9244 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -340,6 +340,14 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
: Widen->getOperand(1));
ExtAType = GetExtendKind(ExtAR);
ExtBType = GetExtendKind(ExtBR);
+
+ if (!ExtBR && Widen->getOperand(1)->isLiveIn()) {
+ auto *CI = cast<ConstantInt>(Widen->getOperand(1)->getLiveInIRValue());
+ if (canConstantBeExtended(CI, InputTypeA, ExtAType)) {
+ InputTypeB = InputTypeA;
+ ExtBType = ExtAType;
+ }
+ }
};
if (isa<VPWidenCastRecipe>(OpR)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a73b083..acdb379 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -40,7 +40,7 @@
using namespace llvm;
using namespace VPlanPatternMatch;
-cl::opt<bool> EnableWideActiveLaneMask(
+static cl::opt<bool> EnableWideActiveLaneMask(
"enable-wide-lane-mask", cl::init(false), cl::Hidden,
cl::desc("Enable use of wide get active lane mask instructions"));