aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCloner.h2
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp6
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp29
-rw-r--r--llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp30
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h27
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp18
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp21
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp4
-rw-r--r--llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h62
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h61
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanSLP.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp327
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp16
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp25
26 files changed, 437 insertions, 269 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroCloner.h b/llvm/lib/Transforms/Coroutines/CoroCloner.h
index e05fe28..1e549f1 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCloner.h
+++ b/llvm/lib/Transforms/Coroutines/CoroCloner.h
@@ -77,7 +77,7 @@ public:
: OrigF(OrigF), Suffix(Suffix), Shape(Shape), FKind(FKind),
Builder(OrigF.getContext()), TTI(TTI) {}
- virtual ~BaseCloner() {}
+ virtual ~BaseCloner() = default;
/// Create a clone for a continuation lowering.
static Function *createClone(Function &OrigF, const Twine &Suffix,
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 5048561..5ed47ae 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -3619,7 +3619,7 @@ struct AAIntraFnReachabilityFunction final
return true;
RQITy StackRQI(A, From, To, ExclusionSet, false);
- typename RQITy::Reachable Result;
+ RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
return NonConstThis->isReachableImpl(A, StackRQI,
/*IsTemporaryRQI=*/true);
@@ -10701,7 +10701,7 @@ struct AAInterFnReachabilityFunction
auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);
RQITy StackRQI(A, From, To, ExclusionSet, false);
- typename RQITy::Reachable Result;
+ RQITy::Reachable Result;
if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
return NonConstThis->isReachableImpl(A, StackRQI,
/*IsTemporaryRQI=*/true);
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 894d83f..d35ae47 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -1034,11 +1034,11 @@ private:
} // namespace
template <>
-struct llvm::DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<CallsiteContextGraph<
ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
: public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
template <>
-struct llvm::DenseMapInfo<typename CallsiteContextGraph<
+struct llvm::DenseMapInfo<CallsiteContextGraph<
IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
: public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
template <>
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d7eb745..2a87a0f 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -208,7 +208,7 @@ namespace KernelInfo {
// };
#define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_IDX(Configuration, 0)
KERNEL_ENVIRONMENT_IDX(Ident, 1)
@@ -216,7 +216,7 @@ KERNEL_ENVIRONMENT_IDX(Ident, 1)
#undef KERNEL_ENVIRONMENT_IDX
#define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \
- constexpr const unsigned MEMBER##Idx = IDX;
+ constexpr unsigned MEMBER##Idx = IDX;
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0)
KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1)
@@ -258,7 +258,7 @@ KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams)
GlobalVariable *
getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) {
- constexpr const int InitKernelEnvironmentArgNo = 0;
+ constexpr int InitKernelEnvironmentArgNo = 0;
return cast<GlobalVariable>(
KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo)
->stripPointerCasts());
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3ddf182..cbaff29 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3997,6 +3997,27 @@ static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
return nullptr;
}
+/// Fold select(X >s 0, 0, -X) | smax(X, 0) --> abs(X)
+/// select(X <s 0, -X, 0) | smax(X, 0) --> abs(X)
+static Value *FoldOrOfSelectSmaxToAbs(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *X;
+ Value *Sel;
+ if (match(&I,
+ m_c_Or(m_Value(Sel), m_OneUse(m_SMax(m_Value(X), m_ZeroInt()))))) {
+ auto NegX = m_Neg(m_Specific(X));
+ if (match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(X),
+ m_ZeroInt()),
+ m_ZeroInt(), NegX)) ||
+ match(Sel, m_Select(m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(X),
+ m_ZeroInt()),
+ NegX, m_ZeroInt())))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::abs, X,
+ Builder.getFalse());
+ }
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4545,6 +4566,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ if (Value *Res = FoldOrOfSelectSmaxToAbs(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f5130da..9572f9d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3599,6 +3599,21 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelCond->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Specific(C), m_Value(A2))) &&
+ SelCond) {
+ return SelectInst::Create(C, A, B, "", nullptr, SelCond);
+ } else if (match(FalseVal,
+ m_LogicalAnd(m_Not(m_Value(C2)), m_Value(B2))) &&
+ SelFVal) {
+ SelectInst *NewSI = SelectInst::Create(C, A, B, "", nullptr, SelFVal);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else {
+ return createSelectInstWithUnknownProfile(C, A, B);
+ }
+ }
return SelectInst::Create(C, A, B);
}
@@ -3615,6 +3630,20 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
m_Not(m_Specific(SelFVal->getTrueValue())));
if (MayNeedFreeze)
C = Builder.CreateFreeze(C);
+ if (!ProfcheckDisableMetadataFixes) {
+ Value *C2 = nullptr, *A2 = nullptr, *B2 = nullptr;
+ if (match(CondVal, m_LogicalAnd(m_Not(m_Value(C2)), m_Value(A2))) &&
+ SelCond) {
+ SelectInst *NewSI = SelectInst::Create(C, B, A, "", nullptr, SelCond);
+ NewSI->swapProfMetadata();
+ return NewSI;
+ } else if (match(FalseVal, m_LogicalAnd(m_Specific(C), m_Value(B2))) &&
+ SelFVal) {
+ return SelectInst::Create(C, B, A, "", nullptr, SelFVal);
+ } else {
+ return createSelectInstWithUnknownProfile(C, B, A);
+ }
+ }
return SelectInst::Create(C, B, A);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
index 80e77e09..a2fad02 100644
--- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp
@@ -161,7 +161,7 @@ template <char NsanTypeId>
class ShadowTypeConfigImpl : public ShadowTypeConfig {
public:
char getNsanTypeId() const override { return NsanTypeId; }
- static constexpr const char kNsanTypeId = NsanTypeId;
+ static constexpr char kNsanTypeId = NsanTypeId;
};
// `double` (`d`) shadow type.
diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
index 89980d5..a577f51 100644
--- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
+++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp
@@ -122,7 +122,8 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) {
Value *Cond = Assume->getArgOperand(0);
// Don't drop type tests, which have special semantics.
- if (match(Cond, m_Intrinsic<Intrinsic::type_test>()))
+ if (match(Cond, m_Intrinsic<Intrinsic::type_test>()) ||
+ match(Cond, m_Intrinsic<Intrinsic::public_type_test>()))
continue;
SmallVector<Value *> Affected;
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index a06f832..d564e32 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -514,7 +514,7 @@ public:
class GVNSink {
public:
- GVNSink() {}
+ GVNSink() = default;
bool run(Function &F) {
LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 3487e81..7e70ba2 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -245,11 +245,14 @@ raw_ostream &operator<<(raw_ostream &OS, ShapeInfo SI) {
} // namespace
-static bool isUniformShape(Value *V) {
+static bool isShapePreserving(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return true;
+ if (isa<SelectInst>(I))
+ return true;
+
if (I->isBinaryOp())
return true;
@@ -300,6 +303,16 @@ static bool isUniformShape(Value *V) {
}
}
+/// Return an iterator over the operands of \p I that should share shape
+/// information with \p I.
+static iterator_range<Use *> getShapedOperandsForInst(Instruction *I) {
+ assert(isShapePreserving(I) &&
+ "Can't retrieve shaped operands for an instruction that does not "
+ "preserve shape information");
+ auto Ops = I->operands();
+ return isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+}
+
/// Return the ShapeInfo for the result of \p I, it it can be determined.
static std::optional<ShapeInfo>
computeShapeInfoForInst(Instruction *I,
@@ -329,9 +342,8 @@ computeShapeInfoForInst(Instruction *I,
return OpShape->second;
}
- if (isUniformShape(I) || isa<SelectInst>(I)) {
- auto Ops = I->operands();
- auto ShapedOps = isa<SelectInst>(I) ? drop_begin(Ops) : Ops;
+ if (isShapePreserving(I)) {
+ auto ShapedOps = getShapedOperandsForInst(I);
// Find the first operand that has a known shape and use that.
for (auto &Op : ShapedOps) {
auto OpShape = ShapeMap.find(Op.get());
@@ -710,10 +722,9 @@ public:
case Intrinsic::matrix_column_major_store:
return true;
default:
- return isUniformShape(II);
+ break;
}
- return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V) ||
- isa<SelectInst>(V);
+ return isShapePreserving(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
}
/// Propagate the shape information of instructions to their users.
@@ -800,9 +811,8 @@ public:
} else if (isa<StoreInst>(V)) {
// Nothing to do. We forward-propagated to this so we would just
// backward propagate to an instruction with an already known shape.
- } else if (isUniformShape(V) || isa<SelectInst>(V)) {
- auto Ops = cast<Instruction>(V)->operands();
- auto ShapedOps = isa<SelectInst>(V) ? drop_begin(Ops) : Ops;
+ } else if (isShapePreserving(V)) {
+ auto ShapedOps = getShapedOperandsForInst(cast<Instruction>(V));
// Propagate to all operands.
ShapeInfo Shape = ShapeMap[V];
for (Use &U : ShapedOps) {
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 0f3978f..5f6f66a 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -143,8 +143,8 @@ struct SubGraphTraits {
class WrappedSuccIterator
: public iterator_adaptor_base<
WrappedSuccIterator, BaseSuccIterator,
- typename std::iterator_traits<BaseSuccIterator>::iterator_category,
- NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+ std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
+ std::ptrdiff_t, NodeRef *, NodeRef> {
SmallDenseSet<RegionNode *> *Nodes;
public:
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 8be471b..6e60b94 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -992,9 +992,12 @@ BranchProbability llvm::getBranchProbability(BranchInst *B,
uint64_t Weight0, Weight1;
if (!extractBranchWeights(*B, Weight0, Weight1))
return BranchProbability::getUnknown();
+ uint64_t Denominator = Weight0 + Weight1;
+ if (Denominator == 0)
+ return BranchProbability::getUnknown();
if (!ForFirstTarget)
std::swap(Weight0, Weight1);
- return BranchProbability::getBranchProbability(Weight0, Weight0 + Weight1);
+ return BranchProbability::getBranchProbability(Weight0, Denominator);
}
bool llvm::setBranchProbability(BranchInst *B, BranchProbability P,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 3fed003..04b0562 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -167,7 +167,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
- new VPInstruction(Opcode, Operands, Flags, DL, Name));
+ new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
}
VPInstruction *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
@@ -184,7 +184,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
- new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
+ new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
}
VPInstruction *createNot(VPValue *Operand,
@@ -205,7 +205,7 @@ public:
return tryInsertInstruction(new VPInstruction(
Instruction::BinaryOps::Or, {LHS, RHS},
- VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
+ VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
}
VPInstruction *createLogicalAnd(VPValue *LHS, VPValue *RHS,
@@ -221,7 +221,7 @@ public:
std::optional<FastMathFlags> FMFs = std::nullopt) {
auto *Select =
FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
- *FMFs, DL, Name)
+ *FMFs, {}, DL, Name)
: new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
DL, Name);
return tryInsertInstruction(Select);
@@ -235,7 +235,7 @@ public:
assert(Pred >= CmpInst::FIRST_ICMP_PREDICATE &&
Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
- new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
+ new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
}
/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
@@ -246,7 +246,7 @@ public:
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
- new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
+ new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));
}
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -254,7 +254,7 @@ public:
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::none(), DL, Name));
+ GEPNoWrapFlags::none(), {}, DL, Name));
}
VPInstruction *createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -262,7 +262,7 @@ public:
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(new VPInstruction(
- VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, DL, Name));
+ VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
}
VPInstruction *createWidePtrAdd(VPValue *Ptr, VPValue *Offset,
@@ -270,7 +270,7 @@ public:
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::WidePtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::none(), DL, Name));
+ GEPNoWrapFlags::none(), {}, DL, Name));
}
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
@@ -280,8 +280,7 @@ public:
VPValue *createElementCount(Type *Ty, ElementCount EC) {
VPlan &Plan = *getInsertBlock()->getPlan();
- VPValue *RuntimeEC =
- Plan.getOrAddLiveIn(ConstantInt::get(Ty, EC.getKnownMinValue()));
+ VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
if (EC.isScalable()) {
VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
RuntimeEC = EC.getKnownMinValue() == 1
@@ -304,9 +303,11 @@ public:
}
VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
- Type *ResultTy, DebugLoc DL) {
+ Type *ResultTy, DebugLoc DL,
+ const VPIRFlags &Flags = {},
+ const VPIRMetadata &Metadata = {}) {
return tryInsertInstruction(
- new VPInstructionWithType(Opcode, Op, ResultTy, {}, DL));
+ new VPInstructionWithType(Opcode, Op, ResultTy, DL, Flags, Metadata));
}
VPValue *createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 25bf49d..e5c3f17 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7752,8 +7752,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
- VPValue *One =
- Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
+ VPValue *One = Plan.getConstantInt(I->getType(), 1u);
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
Ops[1] = SafeRHS;
return new VPWidenRecipe(*I, Ops);
@@ -7806,11 +7805,10 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
}
case Instruction::ExtractValue: {
SmallVector<VPValue *> NewOps(Operands);
- Type *I32Ty = IntegerType::getInt32Ty(I->getContext());
auto *EVI = cast<ExtractValueInst>(I);
assert(EVI->getNumIndices() == 1 && "Expected one extractvalue index");
unsigned Idx = EVI->getIndices()[0];
- NewOps.push_back(Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, Idx, false)));
+ NewOps.push_back(Plan.getConstantInt(32, Idx));
return new VPWidenRecipe(*I, NewOps);
}
};
@@ -8179,8 +8177,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
- VPValue *Zero =
- Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
+ VPValue *Zero = Plan.getConstantInt(Reduction->getType(), 0);
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
@@ -8643,7 +8640,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
} else if (PhiR->isInLoop() && Kind == RecurKind::AddChainWithSubs &&
CurrentLinkI->getOpcode() == Instruction::Sub) {
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
- auto *Zero = Plan->getOrAddLiveIn(ConstantInt::get(PhiTy, 0));
+ auto *Zero = Plan->getConstantInt(PhiTy, 0);
VPWidenRecipe *Sub = new VPWidenRecipe(
Instruction::Sub, {Zero, CurrentLink->getOperand(1)}, {},
VPIRMetadata(), CurrentLinkI->getDebugLoc());
@@ -8857,8 +8854,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
ToDelete.push_back(Select);
// Convert the reduction phi to operate on bools.
- PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
- OrigLoop->getHeader()->getContext())));
+ PhiR->setOperand(0, Plan->getFalse());
continue;
}
@@ -8880,9 +8876,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
unsigned ScaleFactor =
RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
.value_or(1);
- Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
- auto *ScaleFactorVPV =
- Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
+ auto *ScaleFactorVPV = Plan->getConstantInt(32, ScaleFactor);
VPValue *StartV = PHBuilder.createNaryOp(
VPInstruction::ReductionStartVector,
{PhiR->getStartValue(), Iden, ScaleFactorVPV},
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 34b405c..bf3f52c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -20975,6 +20975,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (isa<PHINode>(S.getMainOp()) ||
isVectorLikeInstWithConstOps(S.getMainOp()))
return nullptr;
+ // If the parent node is non-schedulable and the current node is copyable, and
+ // any of parent instructions are used outside several basic blocks or in
+ // bin-op node - cancel scheduling, it may cause wrong def-use deps in
+ // analysis, leading to a crash.
+ // Non-scheduled nodes may not have related ScheduleData model, which may lead
+ // to a skipped dep analysis.
+ if (S.areInstructionsWithCopyableElements() && EI && EI.UserTE->hasState() &&
+ EI.UserTE->doesNotNeedToSchedule() &&
+ EI.UserTE->getOpcode() != Instruction::PHI &&
+ any_of(EI.UserTE->Scalars, [](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || I->hasOneUser())
+ return false;
+ for (User *U : I->users()) {
+ auto *UI = cast<Instruction>(U);
+ if (isa<BinaryOperator>(UI))
+ return true;
+ }
+ return false;
+ }))
+ return std::nullopt;
bool HasCopyables = S.areInstructionsWithCopyableElements();
if (((!HasCopyables && doesNotNeedToSchedule(VL)) ||
all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))) {
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
index 9c869dd..d354933 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp
@@ -92,7 +92,7 @@ void MemDGNode::print(raw_ostream &OS, bool PrintDeps) const {
DGNode::print(OS, false);
if (PrintDeps) {
// Print memory preds.
- static constexpr const unsigned Indent = 4;
+ static constexpr unsigned Indent = 4;
for (auto *Pred : MemPreds)
OS.indent(Indent) << "<-" << *Pred->getInstruction() << "\n";
}
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index 86dbd21..5534da9 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -25,14 +25,14 @@ static cl::opt<bool>
"emit new instructions (*very* expensive)."));
#endif // NDEBUG
-static constexpr const unsigned long StopAtDisabled =
+static constexpr unsigned long StopAtDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopAt("sbvec-stop-at", cl::init(StopAtDisabled), cl::Hidden,
cl::desc("Vectorize if the invocation count is < than this. 0 "
"disables vectorization."));
-static constexpr const unsigned long StopBundleDisabled =
+static constexpr unsigned long StopBundleDisabled =
std::numeric_limits<unsigned long>::max();
static cl::opt<unsigned long>
StopBundle("sbvec-stop-bndl", cl::init(StopBundleDisabled), cl::Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index ed2f80b..2de6921 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -43,7 +43,7 @@ cl::opt<std::string> AllowFiles(
"sbvec-allow-files", cl::init(".*"), cl::Hidden,
cl::desc("Run the vectorizer only on file paths that match any in the "
"list of comma-separated regex's."));
-static constexpr const char AllowFilesDelim = ',';
+static constexpr char AllowFilesDelim = ',';
SandboxVectorizerPass::SandboxVectorizerPass() : FPM("fpm") {
if (UserDefinedPassPipeline == DefaultPipelineMagicStr) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 1504acf..cfe1f1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -939,7 +939,7 @@ class VPIRMetadata {
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
public:
- VPIRMetadata() {}
+ VPIRMetadata() = default;
/// Adds metatadata that can be preserved from the original instruction
/// \p I.
@@ -950,12 +950,9 @@ public:
VPIRMetadata(Instruction &I, LoopVersioning *LVer);
/// Copy constructor for cloning.
- VPIRMetadata(const VPIRMetadata &Other) : Metadata(Other.Metadata) {}
+ VPIRMetadata(const VPIRMetadata &Other) = default;
- VPIRMetadata &operator=(const VPIRMetadata &Other) {
- Metadata = Other.Metadata;
- return *this;
- }
+ VPIRMetadata &operator=(const VPIRMetadata &Other) = default;
/// Add all metadata to \p I.
void applyMetadata(Instruction &I) const;
@@ -1107,14 +1104,14 @@ public:
VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
- const VPIRFlags &Flags, DebugLoc DL = DebugLoc::getUnknown(),
- const Twine &Name = "");
+ const VPIRFlags &Flags, const VPIRMetadata &MD = {},
+ DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
- auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name);
+ auto *New = new VPInstruction(Opcode, operands(), *this, *this,
+ getDebugLoc(), Name);
if (getUnderlyingValue())
New->setUnderlyingValue(getUnderlyingInstr());
return New;
@@ -1196,7 +1193,14 @@ public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
const Twine &Name = "")
- : VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}
+ : VPInstruction(Opcode, Operands, Flags, {}, DL, Name),
+ ResultTy(ResultTy) {}
+
+ VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
+ Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags,
+ const VPIRMetadata &Metadata, const Twine &Name = "")
+ : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
+ ResultTy(ResultTy) {}
static inline bool classof(const VPRecipeBase *R) {
// VPInstructionWithType are VPInstructions with specific opcodes requiring
@@ -1221,10 +1225,9 @@ public:
}
VPInstruction *clone() override {
- SmallVector<VPValue *, 2> Operands(operands());
auto *New =
- new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this,
- getDebugLoc(), getName());
+ new VPInstructionWithType(getOpcode(), operands(), getResultType(),
+ *this, getDebugLoc(), getName());
New->setUnderlyingValue(getUnderlyingValue());
return New;
}
@@ -3206,6 +3209,9 @@ protected:
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
Alignment(Alignment), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
+ assert(isa<VPVectorEndPointerRecipe>(getAddr()) ||
+ !Reverse &&
+ "Reversed acccess without VPVectorEndPointerRecipe address?");
}
public:
@@ -3977,7 +3983,7 @@ class VPIRBasicBlock : public VPBasicBlock {
IRBB(IRBB) {}
public:
- ~VPIRBasicBlock() override {}
+ ~VPIRBasicBlock() override = default;
static inline bool classof(const VPBlockBase *V) {
return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
@@ -4029,7 +4035,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
IsReplicator(IsReplicator) {}
public:
- ~VPRegionBlock() override {}
+ ~VPRegionBlock() override = default;
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPBlockBase *V) {
@@ -4393,15 +4399,25 @@ public:
}
/// Return a VPValue wrapping i1 true.
- VPValue *getTrue() {
- LLVMContext &Ctx = getContext();
- return getOrAddLiveIn(ConstantInt::getTrue(Ctx));
- }
+ VPValue *getTrue() { return getConstantInt(1, 1); }
/// Return a VPValue wrapping i1 false.
- VPValue *getFalse() {
- LLVMContext &Ctx = getContext();
- return getOrAddLiveIn(ConstantInt::getFalse(Ctx));
+ VPValue *getFalse() { return getConstantInt(1, 0); }
+
+ /// Return a VPValue wrapping a ConstantInt with the given type and value.
+ VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
+ return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
+ }
+
+ /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
+ VPValue *getConstantInt(unsigned BitWidth, uint64_t Val,
+ bool IsSigned = false) {
+ return getConstantInt(APInt(BitWidth, Val, IsSigned));
+ }
+
+ /// Return a VPValue wrapping a ConstantInt with the given APInt value.
+ VPValue *getConstantInt(const APInt &Val) {
+ return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
}
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 65688a3..1a66d20 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -612,8 +612,7 @@ void VPlanTransforms::addMiddleCheck(VPlan &Plan,
if (!RequiresScalarEpilogueCheck)
Cmp = Plan.getFalse();
else if (TailFolded)
- Cmp = Plan.getOrAddLiveIn(
- ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
+ Cmp = Plan.getTrue();
else
Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
&Plan.getVectorTripCount(), LatchDL, "cmp.n");
@@ -712,8 +711,8 @@ void VPlanTransforms::addMinimumIterationCheck(
// additional overflow check is required before entering the vector loop.
// Get the maximum unsigned value for the type.
- VPValue *MaxUIntTripCount = Plan.getOrAddLiveIn(ConstantInt::get(
- TripCountTy, cast<IntegerType>(TripCountTy)->getMask()));
+ VPValue *MaxUIntTripCount =
+ Plan.getConstantInt(cast<IntegerType>(TripCountTy)->getMask());
VPValue *DistanceToMax = Builder.createNaryOp(
Instruction::Sub, {MaxUIntTripCount, TripCountVPV},
DebugLoc::getUnknown());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index b5b98c6..b57c448 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -313,7 +313,8 @@ private:
// Check for recipes that do not have opcodes.
if constexpr (std::is_same_v<RecipeTy, VPScalarIVStepsRecipe> ||
std::is_same_v<RecipeTy, VPCanonicalIVPHIRecipe> ||
- std::is_same_v<RecipeTy, VPDerivedIVRecipe>)
+ std::is_same_v<RecipeTy, VPDerivedIVRecipe> ||
+ std::is_same_v<RecipeTy, VPVectorEndPointerRecipe>)
return DefR;
else
return DefR && DefR->getOpcode() == Opcode;
@@ -686,6 +687,64 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
}
+template <typename Addr_t, typename Mask_t> struct Load_match {
+ Addr_t Addr;
+ Mask_t Mask;
+
+ Load_match(Addr_t Addr, Mask_t Mask) : Addr(Addr), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Load = dyn_cast<VPWidenLoadRecipe>(V);
+ if (!Load || !Addr.match(Load->getAddr()) || !Load->isMasked() ||
+ !Mask.match(Load->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked load.
+template <typename Addr_t, typename Mask_t>
+inline Load_match<Addr_t, Mask_t> m_MaskedLoad(const Addr_t &Addr,
+ const Mask_t &Mask) {
+ return Load_match<Addr_t, Mask_t>(Addr, Mask);
+}
+
+template <typename Addr_t, typename Val_t, typename Mask_t> struct Store_match {
+ Addr_t Addr;
+ Val_t Val;
+ Mask_t Mask;
+
+ Store_match(Addr_t Addr, Val_t Val, Mask_t Mask)
+ : Addr(Addr), Val(Val), Mask(Mask) {}
+
+ template <typename OpTy> bool match(const OpTy *V) const {
+ auto *Store = dyn_cast<VPWidenStoreRecipe>(V);
+ if (!Store || !Addr.match(Store->getAddr()) ||
+ !Val.match(Store->getStoredValue()) || !Store->isMasked() ||
+ !Mask.match(Store->getMask()))
+ return false;
+ return true;
+ }
+};
+
+/// Match a (possibly reversed) masked store.
+template <typename Addr_t, typename Val_t, typename Mask_t>
+inline Store_match<Addr_t, Val_t, Mask_t>
+m_MaskedStore(const Addr_t &Addr, const Val_t &Val, const Mask_t &Mask) {
+ return Store_match<Addr_t, Val_t, Mask_t>(Addr, Val, Mask);
+}
+
+template <typename Op0_t, typename Op1_t>
+using VectorEndPointerRecipe_match =
+ Recipe_match<std::tuple<Op0_t, Op1_t>, 0,
+ /*Commutative*/ false, VPVectorEndPointerRecipe>;
+
+template <typename Op0_t, typename Op1_t>
+VectorEndPointerRecipe_match<Op0_t, Op1_t> m_VecEndPtr(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return VectorEndPointerRecipe_match<Op0_t, Op1_t>(Op0, Op1);
+}
+
/// Match a call argument at a given argument index.
template <typename Opnd_t> struct Argument_match {
/// Call argument index to match.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index f9c15a3..1ee405a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -162,8 +162,12 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPVectorEndPointerSC:
return false;
- case VPInstructionSC:
- return mayWriteToMemory();
+ case VPInstructionSC: {
+ auto *VPI = cast<VPInstruction>(this);
+ return mayWriteToMemory() ||
+ VPI->getOpcode() == VPInstruction::BranchOnCount ||
+ VPI->getOpcode() == VPInstruction::BranchOnCond;
+ }
case VPWidenCallSC: {
Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
@@ -490,10 +494,10 @@ template class VPUnrollPartAccessor<3>;
}
VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
- const VPIRFlags &Flags, DebugLoc DL,
- const Twine &Name)
+ const VPIRFlags &Flags, const VPIRMetadata &MD,
+ DebugLoc DL, const Twine &Name)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
- VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {
+ VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) {
assert(flagsValidForOpcode(getOpcode()) &&
"Set flags not supported for the provided opcode");
assert((getNumOperandsForOpcode(Opcode) == -1u ||
@@ -1241,6 +1245,8 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case Instruction::Select:
case Instruction::PHI:
case VPInstruction::AnyOf:
+ case VPInstruction::BranchOnCond:
+ case VPInstruction::BranchOnCount:
case VPInstruction::Broadcast:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.h b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
index 77ff36c..44972c68 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
@@ -89,8 +89,7 @@ class VPlanSlp {
/// Width of the widest combined bundle in bits.
unsigned WidestBundleBits = 0;
- using MultiNodeOpTy =
- typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+ using MultiNodeOpTy = std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
// Input operand bundles for the current multi node. Each multi node operand
// bundle contains values not matching the multi node's opcode. They will
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d491d56..9d9bb14 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -151,7 +151,27 @@ static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
+ bool ScalarVFOnly = Plan.hasScalarVFOnly();
bool Changed = false;
+
+ auto IsValidSinkCandidate = [ScalarVFOnly](VPBasicBlock *SinkTo,
+ VPSingleDefRecipe *Candidate) {
+ // We only know how to duplicate VPReplicateRecipes and
+ // VPScalarIVStepsRecipes for now.
+ if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
+ return false;
+
+ if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
+ Candidate->mayReadOrWriteMemory())
+ return false;
+
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
+ if (!ScalarVFOnly && RepR->isSingleScalar())
+ return false;
+
+ return true;
+ };
+
// First, collect the operands of all recipes in replicate blocks as seeds for
// sinking.
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
@@ -159,51 +179,37 @@ static bool sinkScalarOperands(VPlan &Plan) {
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
continue;
- VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
- if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
+ VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
+ if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
continue;
for (auto &Recipe : *VPBB) {
- for (VPValue *Op : Recipe.operands())
+ for (VPValue *Op : Recipe.operands()) {
if (auto *Def =
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({VPBB, Def});
+ if (IsValidSinkCandidate(VPBB, Def))
+ WorkList.insert({VPBB, Def});
+ }
}
}
- bool ScalarVFOnly = Plan.hasScalarVFOnly();
// Try to sink each replicate or scalar IV steps recipe in the worklist.
for (unsigned I = 0; I != WorkList.size(); ++I) {
VPBasicBlock *SinkTo;
VPSingleDefRecipe *SinkCandidate;
std::tie(SinkTo, SinkCandidate) = WorkList[I];
- if (SinkCandidate->getParent() == SinkTo ||
- SinkCandidate->mayHaveSideEffects() ||
- SinkCandidate->mayReadOrWriteMemory())
- continue;
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
- if (!ScalarVFOnly && RepR->isSingleScalar())
- continue;
- } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
- continue;
- bool NeedsDuplicating = false;
// All recipe users of the sink candidate must be in the same block SinkTo
- // or all users outside of SinkTo must be uniform-after-vectorization (
- // i.e., only first lane is used) . In the latter case, we need to duplicate
- // SinkCandidate.
- auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
- SinkCandidate](VPUser *U) {
- auto *UI = cast<VPRecipeBase>(U);
- if (UI->getParent() == SinkTo)
- return true;
- NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
- // We only know how to duplicate VPReplicateRecipes and
- // VPScalarIVStepsRecipes for now.
- return NeedsDuplicating &&
- isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
- };
- if (!all_of(SinkCandidate->users(), CanSinkWithUser))
+ // or all users outside of SinkTo must have only their first lane used. In
+ // the latter case, we need to duplicate SinkCandidate.
+ auto UsersOutsideSinkTo =
+ make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) {
+ return cast<VPRecipeBase>(U)->getParent() != SinkTo;
+ });
+ if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
+ return !U->onlyFirstLaneUsed(SinkCandidate);
+ }))
continue;
+ bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
if (NeedsDuplicating) {
if (ScalarVFOnly)
@@ -230,7 +236,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
for (VPValue *Op : SinkCandidate->operands())
if (auto *Def =
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
- WorkList.insert({SinkTo, Def});
+ if (IsValidSinkCandidate(SinkTo, Def))
+ WorkList.insert({SinkTo, Def});
Changed = true;
}
return Changed;
@@ -699,8 +706,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
continue;
const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
- VPValue *StartV =
- Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
+ VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
VPValue *StepV = PtrIV->getOperand(1);
VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
@@ -836,7 +842,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan,
// changed it means the exit is using the incremented value, so we need to
// add the step.
if (Incoming != WideIV) {
- VPValue *One = Plan.getOrAddLiveIn(ConstantInt::get(CanonicalIVType, 1));
+ VPValue *One = Plan.getConstantInt(CanonicalIVType, 1);
EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);
}
@@ -882,7 +888,7 @@ static VPValue *optimizeLatchExitInductionUser(
return B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
if (ScalarTy->isPointerTy()) {
Type *StepTy = TypeInfo.inferScalarType(Step);
- auto *Zero = Plan.getOrAddLiveIn(ConstantInt::get(StepTy, 0));
+ auto *Zero = Plan.getConstantInt(StepTy, 0);
return B.createPtrAdd(EndValue,
B.createNaryOp(Instruction::Sub, {Zero, Step}),
DebugLoc::getUnknown(), "ind.escape");
@@ -1057,13 +1063,9 @@ static VPValue *tryToFoldLiveIns(VPSingleDefRecipe &R,
return nullptr;
}
-/// Try to simplify recipe \p R.
-static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
- VPlan *Plan = R.getParent()->getPlan();
-
- auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
- if (!Def)
- return;
+/// Try to simplify VPSingleDefRecipe \p Def.
+static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
+ VPlan *Plan = Def->getParent()->getPlan();
// Simplification of live-in IR values for SingleDef recipes using
// InstSimplifyFolder.
@@ -1073,7 +1075,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(V);
// Fold PredPHI LiveIn -> LiveIn.
- if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(&R)) {
+ if (auto *PredPHI = dyn_cast<VPPredInstPHIRecipe>(Def)) {
VPValue *Op = PredPHI->getOperand(0);
if (Op->isLiveIn())
PredPHI->replaceAllUsesWith(Op);
@@ -1092,12 +1094,12 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
- unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
+ unsigned ExtOpcode = match(Def->getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *Ext = Builder.createWidenCast(Instruction::CastOps(ExtOpcode), A,
TruncTy);
- if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
+ if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
Ext->setUnderlyingValue(UnderlyingExt);
}
@@ -1160,7 +1162,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));
// x && !x -> 0
- if (match(&R, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
+ if (match(Def, m_LogicalAnd(m_VPValue(X), m_Not(m_Deferred(X)))))
return Def->replaceAllUsesWith(Plan->getFalse());
if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X))))
@@ -1188,8 +1190,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return Def->replaceAllUsesWith(A);
if (match(Def, m_c_Mul(m_VPValue(A), m_ZeroInt())))
- return Def->replaceAllUsesWith(R.getOperand(0) == A ? R.getOperand(1)
- : R.getOperand(0));
+ return Def->replaceAllUsesWith(
+ Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));
if (match(Def, m_Not(m_VPValue(A)))) {
if (match(A, m_Not(m_VPValue(A))))
@@ -1218,8 +1220,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// If Cmp doesn't have a debug location, use the one from the negation,
// to preserve the location.
- if (!Cmp->getDebugLoc() && R.getDebugLoc())
- Cmp->setDebugLoc(R.getDebugLoc());
+ if (!Cmp->getDebugLoc() && Def->getDebugLoc())
+ Cmp->setDebugLoc(Def->getDebugLoc());
}
}
}
@@ -1245,7 +1247,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (match(Def, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),
m_VPValue(X), m_VPValue())) &&
match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) &&
- TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
+ TypeInfo.inferScalarType(Def)->isIntegerTy(1)) {
Def->setOperand(1, Def->getOperand(0));
Def->setOperand(0, Y);
return;
@@ -1253,35 +1255,41 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (auto *Phi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(Def)) {
if (Phi->getOperand(0) == Phi->getOperand(1))
- Def->replaceAllUsesWith(Phi->getOperand(0));
+ Phi->replaceAllUsesWith(Phi->getOperand(0));
return;
}
// Look through ExtractLastElement (BuildVector ....).
- if (match(&R, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
- m_ExtractLastLanePerPart(m_BuildVector())))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_CombineOr(m_ExtractLastElement(m_BuildVector()),
+ m_ExtractLastLanePerPart(m_BuildVector())))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 1));
return;
}
// Look through ExtractPenultimateElement (BuildVector ....).
- if (match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
- m_BuildVector()))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
+ m_BuildVector()))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(
BuildVector->getOperand(BuildVector->getNumOperands() - 2));
return;
}
uint64_t Idx;
- if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
- auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ if (match(Def, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
+ auto *BuildVector = cast<VPInstruction>(Def->getOperand(0));
Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
return;
}
+ if (match(Def, m_BuildVector()) && all_equal(Def->operands())) {
+ Def->replaceAllUsesWith(
+ Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0)));
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
@@ -1298,7 +1306,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
isa<VPPhi>(X)) {
auto *Phi = cast<VPPhi>(X);
if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
- Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == Def)) {
Phi->setOperand(0, Y);
Def->replaceAllUsesWith(Phi);
return;
@@ -1306,7 +1314,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
// VPVectorPointer for part 0 can be replaced by their start pointer.
- if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
+ if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {
if (VecPtr->isFirstPart()) {
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
return;
@@ -1361,9 +1369,9 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
Plan.getEntry());
VPTypeAnalysis TypeInfo(Plan);
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
- for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
- simplifyRecipe(R, TypeInfo);
- }
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB))
+ if (auto *Def = dyn_cast<VPSingleDefRecipe>(&R))
+ simplifyRecipe(Def, TypeInfo);
}
}
@@ -1574,9 +1582,9 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
continue;
// Update IV operands and comparison bound to use new narrower type.
- auto *NewStart = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 0));
+ auto *NewStart = Plan.getConstantInt(NewIVTy, 0);
WideIV->setStartValue(NewStart);
- auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
+ auto *NewStep = Plan.getConstantInt(NewIVTy, 1);
WideIV->setStepValue(NewStep);
auto *NewBTC = new VPWidenCastRecipe(
@@ -1695,8 +1703,7 @@ static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF,
// When using wide lane masks, the return type of the get.active.lane.mask
// intrinsic is VF x UF (last operand).
- VPValue *ALMMultiplier =
- Plan.getOrAddLiveIn(ConstantInt::get(IntegerType::getInt64Ty(Ctx), UF));
+ VPValue *ALMMultiplier = Plan.getConstantInt(64, UF);
EntryALM->setOperand(2, ALMMultiplier);
LoopALM->setOperand(2, ALMMultiplier);
@@ -2403,7 +2410,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
// Create the active lane mask instruction in the VPlan preheader.
VPValue *ALMMultiplier =
- Plan.getOrAddLiveIn(ConstantInt::get(TopRegion->getCanonicalIVType(), 1));
+ Plan.getConstantInt(TopRegion->getCanonicalIVType(), 1);
auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
{EntryIncrement, TC, ALMMultiplier}, DL,
"active.lane.mask.entry");
@@ -2517,90 +2524,102 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->eraseFromParent();
}
+template <typename Op0_t, typename Op1_t> struct RemoveMask_match {
+ Op0_t In;
+ Op1_t &Out;
+
+ RemoveMask_match(const Op0_t &In, Op1_t &Out) : In(In), Out(Out) {}
+
+ template <typename OpTy> bool match(OpTy *V) const {
+ if (m_Specific(In).match(V)) {
+ Out = nullptr;
+ return true;
+ }
+ if (m_LogicalAnd(m_Specific(In), m_VPValue(Out)).match(V))
+ return true;
+ return false;
+ }
+};
+
+/// Match a specific mask \p In, or a combination of it (logical-and In, Out).
+/// Returns the remaining part \p Out if so, or nullptr otherwise.
+template <typename Op0_t, typename Op1_t>
+static inline RemoveMask_match<Op0_t, Op1_t> m_RemoveMask(const Op0_t &In,
+ Op1_t &Out) {
+ return RemoveMask_match<Op0_t, Op1_t>(In, Out);
+}
+
/// Try to optimize a \p CurRecipe masked by \p HeaderMask to a corresponding
/// EVL-based recipe without the header mask. Returns nullptr if no EVL-based
/// recipe could be created.
/// \p HeaderMask Header Mask.
/// \p CurRecipe Recipe to be transform.
/// \p TypeInfo VPlan-based type analysis.
-/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
/// \p EVL The explicit vector length parameter of vector-predication
/// intrinsics.
static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
VPRecipeBase &CurRecipe,
- VPTypeAnalysis &TypeInfo,
- VPValue &AllOneMask, VPValue &EVL) {
- // FIXME: Don't transform recipes to EVL recipes if they're not masked by the
- // header mask.
- auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
- assert(OrigMask && "Unmasked recipe when folding tail");
- // HeaderMask will be handled using EVL.
- VPValue *Mask;
- if (match(OrigMask, m_LogicalAnd(m_Specific(HeaderMask), m_VPValue(Mask))))
- return Mask;
- return HeaderMask == OrigMask ? nullptr : OrigMask;
- };
+ VPTypeAnalysis &TypeInfo, VPValue &EVL) {
+ VPlan *Plan = CurRecipe.getParent()->getPlan();
+ VPValue *Addr, *Mask, *EndPtr;
/// Adjust any end pointers so that they point to the end of EVL lanes not VF.
- auto GetNewAddr = [&CurRecipe, &EVL](VPValue *Addr) -> VPValue * {
- auto *EndPtr = dyn_cast<VPVectorEndPointerRecipe>(Addr);
- if (!EndPtr)
- return Addr;
- assert(EndPtr->getOperand(1) == &EndPtr->getParent()->getPlan()->getVF() &&
- "VPVectorEndPointerRecipe with non-VF VF operand?");
- assert(
- all_of(EndPtr->users(),
- [](VPUser *U) {
- return cast<VPWidenMemoryRecipe>(U)->isReverse();
- }) &&
- "VPVectorEndPointRecipe not used by reversed widened memory recipe?");
- VPVectorEndPointerRecipe *EVLAddr = EndPtr->clone();
- EVLAddr->insertBefore(&CurRecipe);
- EVLAddr->setOperand(1, &EVL);
- return EVLAddr;
+ auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {
+ auto *EVLEndPtr = cast<VPVectorEndPointerRecipe>(EndPtr)->clone();
+ EVLEndPtr->insertBefore(&CurRecipe);
+ EVLEndPtr->setOperand(1, &EVL);
+ return EVLEndPtr;
};
- return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
- .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
- VPValue *NewMask = GetNewMask(L->getMask());
- VPValue *NewAddr = GetNewAddr(L->getAddr());
- return new VPWidenLoadEVLRecipe(*L, NewAddr, EVL, NewMask);
- })
- .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
- VPValue *NewMask = GetNewMask(S->getMask());
- VPValue *NewAddr = GetNewAddr(S->getAddr());
- return new VPWidenStoreEVLRecipe(*S, NewAddr, EVL, NewMask);
- })
- .Case<VPInterleaveRecipe>([&](VPInterleaveRecipe *IR) {
- VPValue *NewMask = GetNewMask(IR->getMask());
- return new VPInterleaveEVLRecipe(*IR, EVL, NewMask);
- })
- .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
- VPValue *NewMask = GetNewMask(Red->getCondOp());
- return new VPReductionEVLRecipe(*Red, EVL, NewMask);
- })
- .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
- VPValue *LHS, *RHS;
- // Transform select with a header mask condition
- // select(header_mask, LHS, RHS)
- // into vector predication merge.
- // vp.merge(all-true, LHS, RHS, EVL)
- if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
- m_VPValue(RHS))))
- return nullptr;
- // Use all true as the condition because this transformation is
- // limited to selects whose condition is a header mask.
- return new VPWidenIntrinsicRecipe(
- Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
- TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
- })
- .Default([&](VPRecipeBase *R) { return nullptr; });
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(Addr), m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe,
+ m_MaskedLoad(m_VPValue(EndPtr), m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenLoadRecipe>(CurRecipe).isReverse())
+ return new VPWidenLoadEVLRecipe(cast<VPWidenLoadRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(Addr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ !cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe), Addr,
+ EVL, Mask);
+
+ if (match(&CurRecipe, m_MaskedStore(m_VPValue(EndPtr), m_VPValue(),
+ m_RemoveMask(HeaderMask, Mask))) &&
+ match(EndPtr, m_VecEndPtr(m_VPValue(Addr), m_Specific(&Plan->getVF()))) &&
+ cast<VPWidenStoreRecipe>(CurRecipe).isReverse())
+ return new VPWidenStoreEVLRecipe(cast<VPWidenStoreRecipe>(CurRecipe),
+ AdjustEndPtr(EndPtr), EVL, Mask);
+
+ if (auto *Rdx = dyn_cast<VPReductionRecipe>(&CurRecipe))
+ if (Rdx->isConditional() &&
+ match(Rdx->getCondOp(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPReductionEVLRecipe(*Rdx, EVL, Mask);
+
+ if (auto *Interleave = dyn_cast<VPInterleaveRecipe>(&CurRecipe))
+ if (Interleave->getMask() &&
+ match(Interleave->getMask(), m_RemoveMask(HeaderMask, Mask)))
+ return new VPInterleaveEVLRecipe(*Interleave, EVL, Mask);
+
+ VPValue *LHS, *RHS;
+ if (match(&CurRecipe,
+ m_Select(m_Specific(HeaderMask), m_VPValue(LHS), m_VPValue(RHS))))
+ return new VPWidenIntrinsicRecipe(
+ Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL},
+ TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc());
+
+ return nullptr;
}
/// Replace recipes with their EVL variants.
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(Plan);
- VPValue *AllOneMask = Plan.getTrue();
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
@@ -2660,7 +2679,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1));
VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe(
Intrinsic::experimental_vp_splice,
- {V1, V2, Imm, AllOneMask, PrevEVL, &EVL},
+ {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},
TypeInfo.inferScalarType(R.getVPSingleValue()), R.getDebugLoc());
VPSplice->insertBefore(&R);
R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
@@ -2694,7 +2713,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPUser *U : collectUsersRecursively(EVLMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
- optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
+ optimizeMaskToEVL(EVLMask, *CurRecipe, TypeInfo, EVL);
if (!EVLRecipe)
continue;
@@ -2790,8 +2809,7 @@ void VPlanTransforms::addExplicitVectorLength(
if (MaxSafeElements) {
// Support for MaxSafeDist for correct loop emission.
- VPValue *AVLSafe =
- Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, *MaxSafeElements));
+ VPValue *AVLSafe = Plan.getConstantInt(CanIVTy, *MaxSafeElements);
VPValue *Cmp = Builder.createICmp(ICmpInst::ICMP_ULT, AVL, AVLSafe);
AVL = Builder.createSelect(Cmp, AVL, AVLSafe, DebugLoc::getUnknown(),
"safe_avl");
@@ -2904,9 +2922,8 @@ void VPlanTransforms::canonicalizeEVLLoops(VPlan &Plan) {
Type *AVLTy = VPTypeAnalysis(Plan).inferScalarType(AVLNext);
VPBuilder Builder(LatchExitingBr);
- VPValue *Cmp =
- Builder.createICmp(CmpInst::ICMP_EQ, AVLNext,
- Plan.getOrAddLiveIn(ConstantInt::getNullValue(AVLTy)));
+ VPValue *Cmp = Builder.createICmp(CmpInst::ICMP_EQ, AVLNext,
+ Plan.getConstantInt(AVLTy, 0));
Builder.createNaryOp(VPInstruction::BranchOnCond, Cmp);
LatchExitingBr->eraseFromParent();
}
@@ -2930,8 +2947,7 @@ void VPlanTransforms::replaceSymbolicStrides(
// Only handle constant strides for now.
continue;
- auto *CI =
- Plan.getOrAddLiveIn(ConstantInt::get(Stride->getType(), *StrideConst));
+ auto *CI = Plan.getConstantInt(*StrideConst);
if (VPValue *StrideVPV = Plan.getLiveIn(StrideV))
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
@@ -2946,7 +2962,7 @@ void VPlanTransforms::replaceSymbolicStrides(
unsigned BW = U->getType()->getScalarSizeInBits();
APInt C =
isa<SExtInst>(U) ? StrideConst->sext(BW) : StrideConst->zext(BW);
- VPValue *CI = Plan.getOrAddLiveIn(ConstantInt::get(U->getType(), C));
+ VPValue *CI = Plan.getConstantInt(C);
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
}
RewriteMap[StrideV] = PSE.getSCEV(StrideV);
@@ -3125,8 +3141,7 @@ void VPlanTransforms::createInterleaveGroups(
DL.getTypeAllocSize(getLoadStoreType(IRInsertPos)) *
IG->getIndex(IRInsertPos),
/*IsSigned=*/true);
- VPValue *OffsetVPV =
- Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
+ VPValue *OffsetVPV = Plan.getConstantInt(-Offset);
VPBuilder B(InsertPos);
Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
}
@@ -3867,8 +3882,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
VPBuilder Builder(VectorPH, VectorPH->begin());
auto *TCTy = VPTypeAnalysis(Plan).inferScalarType(Plan.getTripCount());
auto *TCMO = Builder.createNaryOp(
- Instruction::Sub,
- {Plan.getTripCount(), Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))},
+ Instruction::Sub, {Plan.getTripCount(), Plan.getConstantInt(TCTy, 1)},
DebugLoc::getCompilerGenerated(), "trip.count.minus.1");
BTC->replaceAllUsesWith(TCMO);
}
@@ -3993,9 +4007,8 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
if (TailByMasking) {
TC = Builder.createNaryOp(
Instruction::Add,
- {TC, Builder.createNaryOp(
- Instruction::Sub,
- {Step, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 1))})},
+ {TC, Builder.createNaryOp(Instruction::Sub,
+ {Step, Plan.getConstantInt(TCTy, 1)})},
DebugLoc::getCompilerGenerated(), "n.rnd.up");
}
@@ -4017,8 +4030,8 @@ void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
if (RequiresScalarEpilogue) {
assert(!TailByMasking &&
"requiring scalar epilogue is not supported with fail folding");
- VPValue *IsZero = Builder.createICmp(
- CmpInst::ICMP_EQ, R, Plan.getOrAddLiveIn(ConstantInt::get(TCTy, 0)));
+ VPValue *IsZero =
+ Builder.createICmp(CmpInst::ICMP_EQ, R, Plan.getConstantInt(TCTy, 0));
R = Builder.createSelect(IsZero, Step, R);
}
@@ -4056,7 +4069,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
}
VF.replaceAllUsesWith(RuntimeVF);
- VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
+ VPValue *UF = Plan.getConstantInt(TCTy, Plan.getUF());
VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
VFxUF.replaceAllUsesWith(MulByUF);
}
@@ -4176,7 +4189,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
unsigned VFMinVal = VF.getKnownMinValue();
SmallVector<VPInterleaveRecipe *> StoreGroups;
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
+ if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
if (isa<VPDerivedIVRecipe, VPScalarIVStepsRecipe>(&R) &&
@@ -4346,7 +4359,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
} else {
Inc->setOperand(1, UF);
Plan.getVF().replaceAllUsesWith(
- Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
+ Plan.getConstantInt(CanIV->getScalarType(), 1));
}
removeDeadRecipes(Plan);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index f15113c..d6a0028 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -68,9 +68,9 @@ class UnrollState {
void unrollWidenInductionByUF(VPWidenInductionRecipe *IV,
VPBasicBlock::iterator InsertPtForPhi);
- VPValue *getConstantVPV(unsigned Part) {
+ VPValue *getConstantInt(unsigned Part) {
Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType();
- return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
+ return Plan.getConstantInt(CanIVIntTy, Part);
}
public:
@@ -137,7 +137,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
- ScalarIVSteps->addOperand(getConstantVPV(Part));
+ ScalarIVSteps->addOperand(getConstantInt(Part));
}
addRecipeForPart(&Part0R, &PartIR, Part);
@@ -249,7 +249,7 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
for (unsigned Part = 1; Part != UF; ++Part)
VPV2Parts[VPI][Part - 1] = StartV;
}
- Copy->addOperand(getConstantVPV(Part));
+ Copy->addOperand(getConstantInt(Part));
} else {
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
"unexpected header phi recipe not needing unrolled part");
@@ -318,7 +318,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
- Copy->addOperand(getConstantVPV(Part));
+ Copy->addOperand(getConstantInt(Part));
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
@@ -474,8 +474,7 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
if (LaneDefs != Def2LaneDefs.end())
return LaneDefs->second[Lane.getKnownLane()];
- VPValue *Idx =
- Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
}
@@ -509,8 +508,7 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
continue;
}
- VPValue *Idx =
- Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane.getKnownLane());
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
NewOps.push_back(Ext);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 8c23e78..c6380d3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -32,22 +32,17 @@ bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
}
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
- VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
- Expanded = Plan.getOrAddLiveIn(E->getValue());
- else {
- auto *U = dyn_cast<SCEVUnknown>(Expr);
- // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
- // value. Otherwise the value may be defined in a loop and using it directly
- // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
- // form.
- if (U && !isa<Instruction>(U->getValue())) {
- Expanded = Plan.getOrAddLiveIn(U->getValue());
- } else {
- Expanded = new VPExpandSCEVRecipe(Expr);
- Plan.getEntry()->appendRecipe(Expanded->getDefiningRecipe());
- }
- }
+ return Plan.getOrAddLiveIn(E->getValue());
+ // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
+ // value. Otherwise the value may be defined in a loop and using it directly
+ // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
+ // form.
+ auto *U = dyn_cast<SCEVUnknown>(Expr);
+ if (U && !isa<Instruction>(U->getValue()))
+ return Plan.getOrAddLiveIn(U->getValue());
+ auto *Expanded = new VPExpandSCEVRecipe(Expr);
+ Plan.getEntry()->appendRecipe(Expanded);
return Expanded;
}