aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2026-04-29 11:48:57 -0400
committerGitHub <noreply@github.com>2026-04-29 11:48:57 -0400
commit2be72ed78188847a8d9e3065d02e2c60245b78d8 (patch)
tree44387e0925aa4a901eb6512f66fbecfb2c5f870a
parentb46904a0326ac685e970b1a8a3f576b865098763 (diff)
downloadllvm-main.tar.gz
llvm-main.tar.bz2
llvm-main.zip
[SLP][NFC] Reduce compile time of isTreeTinyAndNotFullyVectorizableHEADmain
Cache root entry and SLPCostThreshold queries once, group !ForReduction-only checks under two blocks, extract a shared benign-node predicate from the two duplicated lambdas, and skip HasSingleLoad and allConstant work when results are dead. Reviewers: Pull Request: https://github.com/llvm/llvm-project/pull/194895
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp466
1 files changed, 268 insertions, 198 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3a6b3dd28b73..f631ec963d49 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17527,34 +17527,50 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
return true;
}
- if (VectorizableTree.size() == 1 && !ForReduction &&
- VectorizableTree.front()->isGather() &&
- VectorizableTree.front()->hasState() &&
- VectorizableTree.front()->getOpcode() == Instruction::ExtractElement)
- return true;
- if (VectorizableTree.size() == 1 && !ForReduction &&
- VectorizableTree.front()->isGather() &&
- any_of(VectorizableTree.front()->Scalars, IsaPred<Instruction>))
- return true;
- if (VectorizableTree.size() <= MinTreeSize && !ForReduction &&
- all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
- return TE->isGather() || TE->State == TreeEntry::SplitVectorize;
- }))
- return true;
- if (VectorizableTree.size() == 1 && !ForReduction && SLPCostThreshold < 0 &&
- VectorizableTree.front()->hasState() &&
- VectorizableTree.front()->getOpcode() == Instruction::ExtractElement &&
- (VectorizableTree.front()->getVectorFactor() == 2 ||
- all_of(
- VectorizableTree.front()->Scalars,
- [&](Value *V) {
- auto *I = dyn_cast<Instruction>(V);
- return !I || !areAllUsersVectorized(I, UserIgnoreList);
- })))
- return true;
+ // Cache values from the root node and the cost-threshold options to avoid
+ // re-querying them inside hot predicates below.
+ const unsigned TreeSize = VectorizableTree.size();
+ const TreeEntry &Front = *VectorizableTree.front();
+ const bool FrontIsGather = Front.isGather();
+ const bool FrontHasState = Front.hasState();
+ const unsigned FrontOpcode = FrontHasState ? Front.getOpcode() : 0u;
+ const bool ThresholdSet = SLPCostThreshold.getNumOccurrences() > 0;
+ const bool ThresholdNonNegative = SLPCostThreshold >= 0;
+
+ constexpr int Limit = 4;
+ constexpr unsigned LargeTree = 20;
+ constexpr int LimitTreeSize = 36;
+
+ // The remaining size-1/size-<=MinTreeSize early bail-outs only apply to
+ // non-reduction trees; group them under a single guard to avoid 3 separate
+ // !ForReduction short-circuits when reducing.
+ if (!ForReduction) {
+ // Single gather node: bail out for ExtractElement or any node containing a
+ // real Instruction scalar.
+ if (TreeSize == 1 && FrontIsGather) {
+ if (FrontHasState && FrontOpcode == Instruction::ExtractElement)
+ return true;
+ if (any_of(Front.Scalars, IsaPred<Instruction>))
+ return true;
+ }
+ if (TreeSize <= MinTreeSize &&
+ all_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->isGather() || TE->State == TreeEntry::SplitVectorize;
+ }))
+ return true;
+ if (TreeSize == 1 && SLPCostThreshold < 0 && FrontHasState &&
+ FrontOpcode == Instruction::ExtractElement &&
+ (Front.getVectorFactor() == 2 ||
+ all_of(
+ Front.Scalars,
+ [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || !areAllUsersVectorized(I, UserIgnoreList);
+ })))
+ return true;
+ }
// No need to vectorize inserts of gathered values.
- if (VectorizableTree.size() == 2 &&
- isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
+ if (TreeSize == 2 && isa<InsertElementInst>(Front.Scalars[0]) &&
VectorizableTree[1]->isGather() &&
(VectorizableTree[1]->getVectorFactor() <= 2 ||
!(isSplat(VectorizableTree[1]->Scalars) ||
@@ -17563,11 +17579,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// The tree with only 3 nodes, where 2 last are gathers/buildvectors, not
// profitable for vectorization.
- constexpr int Limit = 4;
- if (VectorizableTree.size() == 3 && SLPCostThreshold == 0 &&
- (!ForReduction || VectorizableTree.front()->getVectorFactor() <= 2) &&
+ if (TreeSize == 3 && SLPCostThreshold == 0 &&
+ (!ForReduction || Front.getVectorFactor() <= 2) &&
all_of(ArrayRef(VectorizableTree).drop_front(),
- [&](const std::unique_ptr<TreeEntry> &TE) {
+ [](const std::unique_ptr<TreeEntry> &TE) {
return TE->isGather() && TE->getVectorFactor() <= Limit &&
!all_of(
TE->Scalars,
@@ -17575,161 +17590,218 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
}))
return true;
- // If the graph includes only PHI nodes and gathers, it is defnitely not
- // profitable for the vectorization, we can skip it, if the cost threshold is
- // default. The cost of vectorized PHI nodes is almost always 0 + the cost of
- // gathers/buildvectors.
- if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
- !VectorizableTree.empty() &&
- all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
- return (TE->isGather() &&
- (!TE->hasState() ||
- TE->getOpcode() != Instruction::ExtractElement) &&
- count_if(TE->Scalars, IsaPred<ExtractElementInst>) <= Limit) ||
- (TE->hasState() && TE->getOpcode() == Instruction::PHI);
- }))
- return true;
+ // All remaining bail-out heuristics require !ForReduction. Group them under
+ // a single guard so reduction trees skip them with one branch instead of one
+ // per check.
+ if (!ForReduction) {
+ // If the graph includes only PHI nodes and gathers, it is defnitely not
+ // profitable for the vectorization, we can skip it, if the cost threshold
+ // is default. The cost of vectorized PHI nodes is almost always 0 + the
+ // cost of gathers/buildvectors.
+ if (!ThresholdSet &&
+ all_of(VectorizableTree, [](const std::unique_ptr<TreeEntry> &TE) {
+ const bool IsGather = TE->isGather();
+ const bool HasState = TE->hasState();
+ const unsigned Op = HasState ? TE->getOpcode() : 0u;
+ if (IsGather && (!HasState || Op != Instruction::ExtractElement) &&
+ count_if(TE->Scalars, IsaPred<ExtractElementInst>) <= Limit)
+ return true;
+ return HasState && Op == Instruction::PHI;
+ }))
+ return true;
- // Do not vectorize small tree of phis only, if all vector phis are also
- // gathered.
- if (!ForReduction && SLPCostThreshold.getNumOccurrences() &&
- VectorizableTree.size() <= Limit &&
- all_of(VectorizableTree,
- [&](const std::unique_ptr<TreeEntry> &TE) {
- return (TE->isGather() &&
- (!TE->hasState() ||
- TE->getOpcode() != Instruction::ExtractElement) &&
- count_if(TE->Scalars, IsaPred<ExtractElementInst>) <=
- Limit) ||
- (TE->hasState() &&
- (TE->getOpcode() == Instruction::InsertElement ||
- (TE->getOpcode() == Instruction::PHI &&
- all_of(TE->Scalars, [&](Value *V) {
- return isa<PoisonValue>(V) || MustGather.contains(V);
- }))));
- }) &&
- any_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
- return TE->State == TreeEntry::Vectorize &&
- TE->getOpcode() == Instruction::PHI;
- }))
- return true;
+ // Do not vectorize small tree of phis only, if all vector phis are also
+ // gathered.
+ if (ThresholdSet && TreeSize <= Limit) {
+ bool HasVectorPhi = false;
+ auto Compatible = [&](const std::unique_ptr<TreeEntry> &TE) {
+ const bool IsGather = TE->isGather();
+ const bool HasState = TE->hasState();
+ const unsigned Op = HasState ? TE->getOpcode() : 0u;
+ if (IsGather && (!HasState || Op != Instruction::ExtractElement) &&
+ count_if(TE->Scalars, IsaPred<ExtractElementInst>) <= Limit)
+ return true;
+ if (!HasState)
+ return false;
+ if (Op == Instruction::InsertElement)
+ return true;
+ if (Op != Instruction::PHI)
+ return false;
+ if (TE->State == TreeEntry::Vectorize)
+ HasVectorPhi = true;
+ return all_of(TE->Scalars, [&](Value *V) {
+ return isa<PoisonValue>(V) || MustGather.contains(V);
+ });
+ };
+ if (all_of(VectorizableTree, Compatible) && HasVectorPhi)
+ return true;
+ }
- // PHI nodes only and gathers cannot be vectorized, skip.
- constexpr unsigned LargeTree = 20;
- bool HasSingleLoad = false;
- if (!ForReduction && SLPCostThreshold >= 0 &&
- all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
- bool PrevLoad = HasSingleLoad;
- HasSingleLoad |=
- TE->hasState() && !TE->isGather() &&
- (TE->getOpcode() == Instruction::Load ||
- TE->hasCopyableElements()) &&
- (TE->getVectorFactor() > 2 || TE->ReorderIndices.empty());
- return (TE->hasState() &&
- (TE->getOpcode() == Instruction::PHI ||
- (VectorizableTree.size() >= LargeTree &&
- (TE->getOpcode() == Instruction::Store ||
- (TE->getOpcode() == Instruction::Load && !PrevLoad)) &&
- TE->getVectorFactor() <= Limit))) ||
- (TE->isGather() &&
- (!TE->hasState() ||
- TE->getOpcode() != Instruction::ExtractElement));
- }))
- return true;
+ // PHI nodes only and gathers cannot be vectorized, skip.
+ if (ThresholdNonNegative) {
+ const bool IsLargeTree = TreeSize >= LargeTree;
+ bool HasSingleLoad = false;
+ if (all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
+ const bool IsGather = TE->isGather();
+ const bool HasState = TE->hasState();
+ const unsigned Op = HasState ? TE->getOpcode() : 0u;
+ // HasSingleLoad/PrevLoad are only consulted in the
+ // IsLargeTree branch; skip the bookkeeping otherwise.
+ if (IsLargeTree) {
+ const bool PrevLoad = HasSingleLoad;
+ HasSingleLoad |=
+ HasState && !IsGather &&
+ (Op == Instruction::Load || TE->hasCopyableElements()) &&
+ (TE->getVectorFactor() > 2 || TE->ReorderIndices.empty());
+ if (HasState) {
+ if (Op == Instruction::PHI)
+ return true;
+ if (TE->getVectorFactor() <= Limit &&
+ (Op == Instruction::Store ||
+ (Op == Instruction::Load && !PrevLoad)))
+ return true;
+ }
+ } else if (HasState && Op == Instruction::PHI) {
+ return true;
+ }
+ return IsGather && (!HasState || Op != Instruction::ExtractElement);
+ }))
+ return true;
- // Single non-phi vector node - skip the tree.
- bool VectorNodeFound = false;
- bool AnyNonConst = false;
- if (!ForReduction && SLPCostThreshold >= 0 && VectorizableTree.size() >= 5 &&
- VectorizableTree.front()->getVectorFactor() <= 2 &&
- VectorizableTree.front()->Scalars.front()->getType()->isIntegerTy() &&
- all_of(VectorizableTree,
- [&](const std::unique_ptr<TreeEntry> &TE) {
- if (TE->State == TreeEntry::Vectorize && TE->hasState()) {
- if (TE->hasState() && (TE->getOpcode() == Instruction::PHI ||
- !TE->ReorderIndices.empty()))
- return true;
- bool PrevVectorNodeFound = VectorNodeFound;
- VectorNodeFound = true;
- return !PrevVectorNodeFound;
- }
- AnyNonConst |= !allConstant(TE->Scalars);
- return TE->isGather() || TE->State == TreeEntry::SplitVectorize;
- }) &&
- AnyNonConst)
- return true;
+ // Single non-phi vector node - skip the tree.
+ if (TreeSize >= 5 && Front.getVectorFactor() <= 2 &&
+ Front.Scalars.front()->getType()->isIntegerTy()) {
+ bool VectorNodeFound = false;
+ bool AnyNonConst = false;
+ if (all_of(VectorizableTree,
+ [&](const std::unique_ptr<TreeEntry> &TE) {
+ if (TE->State == TreeEntry::Vectorize && TE->hasState()) {
+ const unsigned Op = TE->getOpcode();
+ if (Op == Instruction::PHI ||
+ !TE->ReorderIndices.empty())
+ return true;
+ if (VectorNodeFound)
+ return false;
+ VectorNodeFound = true;
+ return true;
+ }
+ // Once AnyNonConst is true, skip the O(n) allConstant
+ // walk for subsequent entries.
+ if (!AnyNonConst)
+ AnyNonConst = !allConstant(TE->Scalars);
+ return TE->isGather() ||
+ TE->State == TreeEntry::SplitVectorize;
+ }) &&
+ AnyNonConst)
+ return true;
+ }
+ }
- // If the tree contains only phis, buildvectors, split nodes and
- // small nodes with reuses, we can skip it.
- SmallVector<const TreeEntry *> StoreLoadNodes;
- unsigned NumGathers = 0;
- constexpr int LimitTreeSize = 36;
- if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
- all_of(VectorizableTree,
- [&](const std::unique_ptr<TreeEntry> &TE) {
- if (!TE->isGather() && TE->hasState() &&
- (TE->getOpcode() == Instruction::Load ||
- TE->getOpcode() == Instruction::Store)) {
- StoreLoadNodes.push_back(TE.get());
- return true;
- }
- if (TE->isGather())
- ++NumGathers;
- return TE->State == TreeEntry::SplitVectorize ||
- (TE->Idx == 0 && TE->Scalars.size() == 2 &&
- TE->hasState() && TE->getOpcode() == Instruction::ICmp &&
- VectorizableTree.size() > LimitTreeSize) ||
- (TE->isGather() &&
- none_of(TE->Scalars, IsaPred<ExtractElementInst>)) ||
- (TE->hasState() &&
- (TE->getOpcode() == Instruction::PHI ||
- (TE->hasCopyableElements() &&
- static_cast<unsigned>(count_if(
- TE->Scalars, IsaPred<PHINode, Constant>)) >=
- TE->Scalars.size() / 2) ||
- ((!TE->ReuseShuffleIndices.empty() ||
- !TE->ReorderIndices.empty() || TE->isAltShuffle()) &&
- TE->Scalars.size() == 2)));
- }) &&
- (StoreLoadNodes.empty() ||
- (VectorizableTree.size() > LimitTreeSize * StoreLoadNodes.size() &&
- (NumGathers > 0 || none_of(StoreLoadNodes, [&](const TreeEntry *TE) {
- return TE->getOpcode() == Instruction::Store ||
- all_of(TE->Scalars, [&](Value *V) {
- return !isa<LoadInst>(V) ||
- areAllUsersVectorized(cast<Instruction>(V));
- });
- })))))
- return true;
+ // Common predicate for "phis, buildvectors, split nodes and small nodes
+ // with reuses" used by the two checks below. Cheap checks are evaluated
+ // before expensive Scalars walks.
+ auto IsBenignNode = [&](const TreeEntry &TE) {
+ if (TE.State == TreeEntry::SplitVectorize)
+ return true;
+ const bool IsGather = TE.isGather();
+ const bool HasState = TE.hasState();
+ if (HasState) {
+ const unsigned Op = TE.getOpcode();
+ if (Op == Instruction::PHI)
+ return true;
+ const unsigned ScalarsSize = TE.Scalars.size();
+ if (TE.Idx == 0 && ScalarsSize == 2 && Op == Instruction::ICmp &&
+ TreeSize > LimitTreeSize)
+ return true;
+ if (ScalarsSize == 2 &&
+ (!TE.ReuseShuffleIndices.empty() || !TE.ReorderIndices.empty() ||
+ TE.isAltShuffle()))
+ return true;
+ if (TE.hasCopyableElements() &&
+ static_cast<unsigned>(count_if(
+ TE.Scalars, IsaPred<PHINode, Constant>)) >= ScalarsSize / 2)
+ return true;
+ }
+ return IsGather && none_of(TE.Scalars, IsaPred<ExtractElementInst>);
+ };
- // If the tree contains only buildvector, 2 non-buildvectors (with root user
- // tree node) and other buildvectors, we can skip it.
- if (!ForReduction && SLPCostThreshold.getNumOccurrences() &&
- VectorizableTree.front()->State == TreeEntry::SplitVectorize &&
- VectorizableTree.size() >= Limit &&
- count_if(ArrayRef(VectorizableTree).drop_front(),
- [&](const std::unique_ptr<TreeEntry> &TE) {
- return !TE->isGather() && TE->UserTreeIndex.UserTE &&
- TE->UserTreeIndex.UserTE->Idx == 0;
- }) == 2)
- return true;
+ // If the tree contains only phis, buildvectors, split nodes and
+ // small nodes with reuses, we can skip it.
+ if (!ThresholdSet) {
+ SmallVector<const TreeEntry *> StoreLoadNodes;
+ unsigned NumGathers = 0;
+ if (all_of(VectorizableTree,
+ [&](const std::unique_ptr<TreeEntry> &TE) {
+ const bool IsGather = TE->isGather();
+ if (!IsGather && TE->hasState()) {
+ const unsigned Op = TE->getOpcode();
+ if (Op == Instruction::Load || Op == Instruction::Store) {
+ StoreLoadNodes.push_back(TE.get());
+ return true;
+ }
+ }
+ if (IsGather)
+ ++NumGathers;
+ return IsBenignNode(*TE);
+ }) &&
+ (StoreLoadNodes.empty() ||
+ (TreeSize > LimitTreeSize * StoreLoadNodes.size() &&
+ (NumGathers > 0 ||
+ none_of(StoreLoadNodes, [&](const TreeEntry *TE) {
+ return TE->getOpcode() == Instruction::Store ||
+ all_of(TE->Scalars, [&](Value *V) {
+ return !isa<LoadInst>(V) ||
+ areAllUsersVectorized(cast<Instruction>(V));
+ });
+ })))))
+ return true;
+ }
- // If the tree contains only vectorization of the phi node from the
- // buildvector - skip it.
- if (!ForReduction && SLPCostThreshold.getNumOccurrences() &&
- VectorizableTree.size() > 2 &&
- VectorizableTree.front()->State == TreeEntry::Vectorize &&
- VectorizableTree.front()->getOpcode() == Instruction::InsertElement &&
- VectorizableTree[1]->State == TreeEntry::Vectorize &&
- VectorizableTree[1]->getOpcode() == Instruction::PHI &&
- all_of(
- ArrayRef(VectorizableTree).drop_front(2),
- [&](const std::unique_ptr<TreeEntry> &TE) { return TE->isGather(); }))
- return true;
+ // If the tree contains only phis, buildvectors, split nodes and
+ // small nodes with reuses, we can skip it.
+ if (ThresholdNonNegative && TreeSize > LimitTreeSize) {
+ const TreeEntry *VectorNode = nullptr;
+ if (all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
+ if (!TE->isGather() && TE->hasState() &&
+ TE->State != TreeEntry::SplitVectorize &&
+ TE->getOpcode() != Instruction::PHI) {
+ if (VectorNode)
+ return false;
+ VectorNode = TE.get();
+ return true;
+ }
+ return IsBenignNode(*TE);
+ }))
+ return true;
+ }
+
+ // If the tree contains only buildvector, 2 non-buildvectors (with root
+ // user tree node) and other buildvectors, we can skip it.
+ if (ThresholdSet && TreeSize >= Limit &&
+ Front.State == TreeEntry::SplitVectorize &&
+ count_if(ArrayRef(VectorizableTree).drop_front(),
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return !TE->isGather() && TE->UserTreeIndex.UserTE &&
+ TE->UserTreeIndex.UserTE->Idx == 0;
+ }) == 2)
+ return true;
+
+ // If the tree contains only vectorization of the phi node from the
+ // buildvector - skip it.
+ if (ThresholdSet && TreeSize > 2 && Front.State == TreeEntry::Vectorize &&
+ FrontOpcode == Instruction::InsertElement &&
+ VectorizableTree[1]->State == TreeEntry::Vectorize &&
+ VectorizableTree[1]->getOpcode() == Instruction::PHI &&
+ all_of(ArrayRef(VectorizableTree).drop_front(2),
+ [](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->isGather();
+ }))
+ return true;
+ }
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
- if (VectorizableTree.size() >= MinTreeSize)
+ if (TreeSize >= MinTreeSize)
return false;
// If we have a tiny tree (a tree whose size is less than MinTreeSize), we
@@ -17738,14 +17810,13 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
return false;
// Check if any of the gather node forms an insertelement buildvector
- // somewhere.
- bool IsAllowedSingleBVNode =
- VectorizableTree.size() > 1 ||
- (VectorizableTree.size() == 1 && VectorizableTree.front()->hasState() &&
- !VectorizableTree.front()->isAltShuffle() &&
- VectorizableTree.front()->getOpcode() != Instruction::PHI &&
- VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
- allSameBlock(VectorizableTree.front()->Scalars));
+ // somewhere. TreeSize >= 1 is guaranteed, so the multi-node case reduces to
+ // a simple TreeSize > 1 short-circuit.
+ const bool IsAllowedSingleBVNode =
+ TreeSize > 1 || (FrontHasState && !Front.isAltShuffle() &&
+ FrontOpcode != Instruction::PHI &&
+ FrontOpcode != Instruction::GetElementPtr &&
+ allSameBlock(Front.Scalars));
if (any_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
return TE->isGather() && all_of(TE->Scalars, [&](Value *V) {
return isa<ExtractElementInst, Constant>(V) ||
@@ -17756,19 +17827,18 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
}))
return false;
- if (VectorizableTree.back()->isGather() &&
- VectorizableTree.back()->hasState() &&
- VectorizableTree.back()->isAltShuffle() &&
- VectorizableTree.back()->getVectorFactor() > 2 &&
- allSameBlock(VectorizableTree.back()->Scalars) &&
- !VectorizableTree.back()->Scalars.front()->getType()->isVectorTy() &&
- TTI->getScalarizationOverhead(
- getWidenedType(VectorizableTree.back()->Scalars.front()->getType(),
- VectorizableTree.back()->getVectorFactor()),
- APInt::getAllOnes(VectorizableTree.back()->getVectorFactor()),
- /*Insert=*/true, /*Extract=*/false,
- TTI::TCK_RecipThroughput) > -SLPCostThreshold)
- return false;
+ const TreeEntry &Back = *VectorizableTree.back();
+ if (Back.isGather() && Back.hasState() && Back.isAltShuffle()) {
+ const unsigned BackVF = Back.getVectorFactor();
+ if (BackVF > 2 && allSameBlock(Back.Scalars) &&
+ !Back.Scalars.front()->getType()->isVectorTy() &&
+ TTI->getScalarizationOverhead(
+ getWidenedType(Back.Scalars.front()->getType(), BackVF),
+ APInt::getAllOnes(BackVF),
+ /*Insert=*/true, /*Extract=*/false,
+ TTI::TCK_RecipThroughput) > -SLPCostThreshold)
+ return false;
+ }
// Otherwise, we can't vectorize the tree. It is both tiny and not fully
// vectorizable.