aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2024-04-29 09:57:37 -0400
committerGitHub <noreply@github.com>2024-04-29 09:57:37 -0400
commit37ae4ad0eef338776c7e2cffb3896153d43dcd90 (patch)
treea8fef19a00498ed857cee6337ba90067aa072aeb
parent040b5a1255dba79f6e6819ac4864256127f810a0 (diff)
downloadllvm-37ae4ad0eef338776c7e2cffb3896153d43dcd90.zip
llvm-37ae4ad0eef338776c7e2cffb3896153d43dcd90.tar.gz
llvm-37ae4ad0eef338776c7e2cffb3896153d43dcd90.tar.bz2
[SLP]Support minbitwidth analisys for buildvector nodes.
Metric: size..text Program size..text exp ref diff test-suite :: MultiSource/Benchmarks/mediabench/gsm/toast/toast.test 42906.00 42986.00 0.2% test-suite :: MultiSource/Benchmarks/MiBench/telecomm-gsm/telecomm-gsm.test 42909.00 42989.00 0.2% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 664581.00 664661.00 0.0% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 664581.00 664661.00 0.0% Less is better. Replaces `buildvector <p x in> + trunc <p x in> to <p x im>` sequences to `buildvector <p x im> of { trunc in to im }` scalars, which is free in most cases, results in better code. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/88504
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp328
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll7
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll3
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll7
-rw-r--r--llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll3
6 files changed, 217 insertions, 137 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2facd03..e3a1b0d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2487,12 +2487,12 @@ private:
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
template <typename BVTy, typename ResTy, typename... Args>
- ResTy processBuildVector(const TreeEntry *E, Args &...Params);
+ ResTy processBuildVector(const TreeEntry *E, Type *ScalarTy, Args &...Params);
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
- Value *createBuildVector(const TreeEntry *E);
+ Value *createBuildVector(const TreeEntry *E, Type *ScalarTy);
/// Returns the instruction in the bundle, which can be used as a base point
/// for scheduling. Usually it is the last instruction in the bundle, except
@@ -2556,7 +2556,8 @@ private:
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
/// \param ForPoisonSrc true if initial vector is poison, false otherwise.
- InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc) const;
+ InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
@@ -2564,7 +2565,7 @@ private:
/// \returns a vector from a collection of scalars in \p VL. if \p Root is not
/// specified, the starting vector value is poison.
- Value *gather(ArrayRef<Value *> VL, Value *Root);
+ Value *gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
@@ -7876,6 +7877,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
bool IsFinalized = false;
SmallVector<int> CommonMask;
SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
+ Type *ScalarTy = nullptr;
const TargetTransformInfo &TTI;
InstructionCost Cost = 0;
SmallDenseSet<Value *> VectorizedVals;
@@ -7905,13 +7907,13 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
if ((!Root && allConstant(VL)) || all_of(VL, IsaPred<UndefValue>))
return TTI::TCC_Free;
- auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
InstructionCost GatherCost = 0;
SmallVector<Value *> Gathers(VL.begin(), VL.end());
// Improve gather cost for gather of loads, if we can group some of the
// loads into vector loads.
InstructionsState S = getSameOpcode(VL, *R.TLI);
- const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
+ const unsigned Sz = R.DL->getTypeSizeInBits(ScalarTy);
unsigned MinVF = R.getMinVF(2 * Sz);
if (VL.size() > 2 &&
((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
@@ -7925,7 +7927,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}))) &&
!all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
!isSplat(Gathers)) {
- InstructionCost BaseCost = R.getGatherCost(Gathers, !Root);
+ InstructionCost BaseCost = R.getGatherCost(Gathers, !Root, ScalarTy);
SetVector<Value *> VectorizedLoads;
SmallVector<std::pair<unsigned, LoadsState>> VectorizedStarts;
SmallVector<unsigned> ScatterVectorized;
@@ -8053,7 +8055,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
VecTy, Mask, CostKind);
}
} else {
- GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true);
+ GatherCost += R.getGatherCost(PointerOps, /*ForPoisonSrc=*/true,
+ PointerOps.front()->getType());
}
}
if (NeedInsertSubvectorAnalysis) {
@@ -8087,18 +8090,19 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
transform(VL, ShuffleMask.begin(), [](Value *V) {
return isa<PoisonValue>(V) ? PoisonMaskElem : 0;
});
- InstructionCost InsertCost = TTI.getVectorInstrCost(
- Instruction::InsertElement, VecTy, CostKind, 0,
- PoisonValue::get(VecTy), *It);
- return InsertCost +
- TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
- ShuffleMask, CostKind, /*Index=*/0,
- /*SubTp=*/nullptr, /*Args=*/*It);
+ InstructionCost InsertCost =
+ TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, 0,
+ PoisonValue::get(VecTy), *It);
+ return InsertCost + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast,
+ VecTy, ShuffleMask, CostKind,
+ /*Index=*/0, /*SubTp=*/nullptr,
+ /*Args=*/*It);
}
return GatherCost +
(all_of(Gathers, IsaPred<UndefValue>)
? TTI::TCC_Free
- : R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
+ : R.getGatherCost(Gathers, !Root && VL.equals(Gathers),
+ ScalarTy));
};
/// Compute the cost of creating a vector containing the extracted values from
@@ -8118,8 +8122,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Sz;
return std::max(Sz, VecTy->getNumElements());
});
- unsigned NumSrcRegs = TTI.getNumberOfParts(
- FixedVectorType::get(VL.front()->getType(), NumElts));
+ unsigned NumSrcRegs =
+ TTI.getNumberOfParts(FixedVectorType::get(ScalarTy, NumElts));
if (NumSrcRegs == 0)
NumSrcRegs = 1;
// FIXME: this must be moved to TTI for better estimation.
@@ -8165,17 +8169,16 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask);
if (!RegShuffleKind) {
- Cost += ::getShuffleCost(
- TTI, *ShuffleKinds[Part],
- FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
+ Cost += ::getShuffleCost(TTI, *ShuffleKinds[Part],
+ FixedVectorType::get(ScalarTy, NumElts),
+ MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += ::getShuffleCost(
- TTI, *RegShuffleKind,
- FixedVectorType::get(VL.front()->getType(), EltsPerVector),
- SubMask);
+ Cost += ::getShuffleCost(TTI, *RegShuffleKind,
+ FixedVectorType::get(ScalarTy, EltsPerVector),
+ SubMask);
}
}
return Cost;
@@ -8292,6 +8295,48 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
SmallVector<int> CommonMask(Mask.begin(), Mask.end());
Value *V1 = P1.dyn_cast<Value *>(), *V2 = P2.dyn_cast<Value *>();
unsigned CommonVF = Mask.size();
+ InstructionCost ExtraCost = 0;
+ auto GetNodeMinBWAffectedCost = [&](const TreeEntry &E,
+ unsigned VF) -> InstructionCost {
+ if (E.State == TreeEntry::NeedToGather && allConstant(E.Scalars))
+ return TTI::TCC_Free;
+ Type *EScalarTy = E.Scalars.front()->getType();
+ bool IsSigned = true;
+ if (auto It = R.MinBWs.find(&E); It != R.MinBWs.end()) {
+ EScalarTy = IntegerType::get(EScalarTy->getContext(), It->second.first);
+ IsSigned = It->second.second;
+ }
+ if (EScalarTy != ScalarTy) {
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(CastOpcode,
+ FixedVectorType::get(ScalarTy, VF),
+ FixedVectorType::get(EScalarTy, VF),
+ TTI::CastContextHint::None, CostKind);
+ }
+ return TTI::TCC_Free;
+ };
+ auto GetValueMinBWAffectedCost = [&](const Value *V) -> InstructionCost {
+ if (isa<Constant>(V))
+ return TTI::TCC_Free;
+ auto *VecTy = cast<VectorType>(V->getType());
+ Type *EScalarTy = VecTy->getElementType();
+ if (EScalarTy != ScalarTy) {
+ bool IsSigned = !isKnownNonNegative(V, SimplifyQuery(*R.DL));
+ unsigned CastOpcode = Instruction::Trunc;
+ unsigned DstSz = R.DL->getTypeSizeInBits(ScalarTy);
+ unsigned SrcSz = R.DL->getTypeSizeInBits(EScalarTy);
+ if (DstSz > SrcSz)
+ CastOpcode = IsSigned ? Instruction::SExt : Instruction::ZExt;
+ return TTI.getCastInstrCost(
+ CastOpcode, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ VecTy, TTI::CastContextHint::None, CostKind);
+ }
+ return TTI::TCC_Free;
+ };
if (!V1 && !V2 && !P2.isNull()) {
// Shuffle 2 entry nodes.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8318,11 +8363,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
}
CommonVF = E->Scalars.size();
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF) +
+ GetNodeMinBWAffectedCost(*E2, CommonVF);
+ } else {
+ ExtraCost += GetNodeMinBWAffectedCost(*E, E->getVectorFactor()) +
+ GetNodeMinBWAffectedCost(*E2, E2->getVectorFactor());
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && P2.isNull()) {
// Shuffle single entry node.
const TreeEntry *E = P1.get<const TreeEntry *>();
@@ -8341,8 +8389,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = E->Scalars.size();
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(*E, CommonVF);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
// Not identity/broadcast? Try to see if the original vector is better.
if (!E->ReorderIndices.empty() && CommonVF == E->ReorderIndices.size() &&
CommonVF == CommonMask.size() &&
@@ -8359,6 +8407,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
} else if (V1 && P2.isNull()) {
// Shuffle single vector.
+ ExtraCost += GetValueMinBWAffectedCost(V1);
CommonVF = cast<FixedVectorType>(V1->getType())->getNumElements();
assert(
all_of(Mask,
@@ -8385,11 +8434,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V1);
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E2, std::min(CommonVF, E2->getVectorFactor()));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else if (!V1 && V2) {
// Shuffle vector and tree node.
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
@@ -8413,11 +8462,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
}
CommonVF = VF;
}
- V1 = Constant::getNullValue(
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL,
- FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
+ ExtraCost += GetNodeMinBWAffectedCost(
+ *E1, std::min(CommonVF, E1->getVectorFactor()));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ ExtraCost += GetValueMinBWAffectedCost(V2);
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
} else {
assert(V1 && V2 && "Expected both vectors.");
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -8428,30 +8477,33 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
return Idx < 2 * static_cast<int>(CommonVF);
}) &&
"All elements in mask must be less than 2 * CommonVF.");
+ ExtraCost +=
+ GetValueMinBWAffectedCost(V1) + GetValueMinBWAffectedCost(V2);
if (V1->getType() != V2->getType()) {
- V1 = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(), CommonVF));
- V2 = getAllOnesValue(
- *R.DL, FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonVF));
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
+ } else {
+ if (cast<VectorType>(V1->getType())->getElementType() != ScalarTy)
+ V1 = Constant::getNullValue(FixedVectorType::get(ScalarTy, CommonVF));
+ if (cast<VectorType>(V2->getType())->getElementType() != ScalarTy)
+ V2 = getAllOnesValue(*R.DL, FixedVectorType::get(ScalarTy, CommonVF));
}
}
- InVectors.front() = Constant::getNullValue(FixedVectorType::get(
- cast<FixedVectorType>(V1->getType())->getElementType(),
- CommonMask.size()));
+ InVectors.front() = Constant::getNullValue(
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
if (InVectors.size() == 2)
InVectors.pop_back();
- return BaseShuffleAnalysis::createShuffle<InstructionCost>(
- V1, V2, CommonMask, Builder);
+ return ExtraCost + BaseShuffleAnalysis::createShuffle<InstructionCost>(
+ V1, V2, CommonMask, Builder);
}
public:
- ShuffleCostEstimator(TargetTransformInfo &TTI,
+ ShuffleCostEstimator(Type *ScalarTy, TargetTransformInfo &TTI,
ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
SmallPtrSetImpl<Value *> &CheckedExtracts)
- : TTI(TTI), VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()),
- R(R), CheckedExtracts(CheckedExtracts) {}
+ : ScalarTy(ScalarTy), TTI(TTI),
+ VectorizedVals(VectorizedVals.begin(), VectorizedVals.end()), R(R),
+ CheckedExtracts(CheckedExtracts) {}
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
unsigned NumParts, bool &UseVecBaseAsInput) {
@@ -8547,7 +8599,7 @@ public:
if (NumParts != 1 && UniqueBases.size() != 1) {
UseVecBaseAsInput = true;
VecBase = Constant::getNullValue(
- FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
+ FixedVectorType::get(ScalarTy, CommonMask.size()));
}
return VecBase;
}
@@ -8575,8 +8627,7 @@ public:
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8593,8 +8644,7 @@ public:
return;
}
assert(!CommonMask.empty() && "Expected non-empty common mask.");
- auto *MaskVecTy =
- FixedVectorType::get(E1.Scalars.front()->getType(), Mask.size());
+ auto *MaskVecTy = FixedVectorType::get(ScalarTy, Mask.size());
unsigned NumParts = TTI.getNumberOfParts(MaskVecTy);
if (NumParts == 0 || NumParts >= Mask.size())
NumParts = 1;
@@ -8694,7 +8744,7 @@ public:
return ConstantVector::getSplat(
ElementCount::getFixed(
cast<FixedVectorType>(Root->getType())->getNumElements()),
- getAllOnesValue(*R.DL, VL.front()->getType()));
+ getAllOnesValue(*R.DL, ScalarTy));
}
InstructionCost createFreeze(InstructionCost Cost) { return Cost; }
/// Finalize emission of the shuffles.
@@ -8840,7 +8890,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
return processBuildVector<ShuffleCostEstimator, InstructionCost>(
- E, *TTI, VectorizedVals, *this, CheckedExtracts);
+ E, ScalarTy, *TTI, VectorizedVals, *this, CheckedExtracts);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
@@ -10880,12 +10930,8 @@ BoUpSLP::isGatherShuffledEntry(
return Res;
}
-InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
- bool ForPoisonSrc) const {
- // Find the type of the operands in VL.
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
+ Type *ScalarTy) const {
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
@@ -10896,6 +10942,11 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost Cost;
auto EstimateInsertCost = [&](unsigned I, Value *V) {
+ if (V->getType() != ScalarTy) {
+ Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
+ TTI::CastContextHint::None, CostKind);
+ V = nullptr;
+ }
if (!ForPoisonSrc)
Cost +=
TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
@@ -11123,7 +11174,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
-Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
+Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
// List of instructions/lanes from current block and/or the blocks which are
// part of the current loop. These instructions will be inserted at the end to
// make it possible to optimize loops and hoist invariant instructions out of
@@ -11149,14 +11200,11 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos,
Type *Ty) {
Value *Scalar = V;
- if (cast<VectorType>(Vec->getType())->getElementType() != Ty) {
- assert(V->getType()->isIntegerTy() && Ty->isIntegerTy() &&
+ if (Scalar->getType() != Ty) {
+ assert(Scalar->getType()->isIntegerTy() && Ty->isIntegerTy() &&
"Expected integer types only.");
- Vec = Builder.CreateIntCast(
- Vec,
- VectorType::get(Ty,
- cast<VectorType>(Vec->getType())->getElementCount()),
- !isKnownNonNegative(Vec, SimplifyQuery(*DL)));
+ Scalar = Builder.CreateIntCast(
+ Scalar, Ty, !isKnownNonNegative(Scalar, SimplifyQuery(*DL)));
}
Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos));
@@ -11184,10 +11232,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
}
return Vec;
};
- Value *Val0 =
- isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
- Type *ScalarTy = Val0->getType();
- FixedVectorType *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
Value *Vec = Root ? Root : PoisonValue::get(VecTy);
SmallVector<int> NonConsts;
// Insert constant values at first.
@@ -11266,6 +11311,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
/// resulting shuffle and the second operand sets to be the newly added
/// operand. The \p CommonMask is transformed in the proper way after that.
SmallVector<Value *, 2> InVectors;
+ Type *ScalarTy = nullptr;
IRBuilderBase &Builder;
BoUpSLP &R;
@@ -11376,9 +11422,20 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
CommonMask[Idx] = Idx;
}
+ /// Cast value \p V to the vector type with the same number of elements, but
+ /// the base type \p ScalarTy.
+ Value *castToScalarTyElem(Value *V) {
+ auto *VecTy = cast<VectorType>(V->getType());
+ if (VecTy->getElementType() == ScalarTy)
+ return V;
+ return Builder.CreateIntCast(
+ V, VectorType::get(ScalarTy, VecTy->getElementCount()),
+ !isKnownNonNegative(V, SimplifyQuery(*R.DL)));
+ }
+
public:
- ShuffleInstructionBuilder(IRBuilderBase &Builder, BoUpSLP &R)
- : Builder(Builder), R(R) {}
+ ShuffleInstructionBuilder(Type *ScalarTy, IRBuilderBase &Builder, BoUpSLP &R)
+ : ScalarTy(ScalarTy), Builder(Builder), R(R) {}
/// Adjusts extractelements after reusing them.
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
@@ -11417,8 +11474,10 @@ public:
continue;
R.eraseInstruction(EI);
}
- if (NumParts == 1 || UniqueBases.size() == 1)
+ if (NumParts == 1 || UniqueBases.size() == 1) {
+ VecBase = castToScalarTyElem(VecBase);
return VecBase;
+ }
UseVecBaseAsInput = true;
auto TransformToIdentity = [](MutableArrayRef<int> Mask) {
for (auto [I, Idx] : enumerate(Mask))
@@ -11455,6 +11514,7 @@ public:
"Expected vectors of the same size.");
PrevSize = Size;
#endif // NDEBUG
+ VecOp = castToScalarTyElem(VecOp);
Bases[SubMask[I] < Size ? 0 : 1] = VecOp;
}
if (!Bases.front())
@@ -11510,10 +11570,10 @@ public:
return std::nullopt;
// Postpone gather emission, will be emitted after the end of the
// process to keep correct order.
- auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(),
- E->getVectorFactor());
+ auto *ResVecTy = FixedVectorType::get(ScalarTy, E->getVectorFactor());
return Builder.CreateAlignedLoad(
- VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
+ ResVecTy,
+ PoisonValue::get(PointerType::getUnqual(ScalarTy->getContext())),
MaybeAlign());
}
/// Adds 2 input vectors (in form of tree entries) and the mask for their
@@ -11529,6 +11589,8 @@ public:
/// Adds 2 input vectors and the mask for their shuffling.
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
+ V1 = castToScalarTyElem(V1);
+ V2 = castToScalarTyElem(V2);
if (InVectors.empty()) {
InVectors.push_back(V1);
InVectors.push_back(V2);
@@ -11556,6 +11618,7 @@ public:
}
/// Adds another one input vector and the mask for the shuffling.
void add(Value *V1, ArrayRef<int> Mask, bool = false) {
+ V1 = castToScalarTyElem(V1);
if (InVectors.empty()) {
if (!isa<FixedVectorType>(V1->getType())) {
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -11619,7 +11682,7 @@ public:
}
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
Value *Root = nullptr) {
- return R.gather(VL, Root);
+ return R.gather(VL, Root, ScalarTy);
}
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
/// Finalize emission of the shuffles.
@@ -11719,7 +11782,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
}
if (IsSameVE) {
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(
+ cast<VectorType>(V->getType())->getElementType(), Builder, *this);
ShuffleBuilder.add(V, Mask);
return ShuffleBuilder.finalize(std::nullopt);
};
@@ -11794,7 +11858,8 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
}
template <typename BVTy, typename ResTy, typename... Args>
-ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
+ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
+ Args &...Params) {
assert(E->State == TreeEntry::NeedToGather && "Expected gather node.");
unsigned VF = E->getVectorFactor();
@@ -11842,7 +11907,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
}
return true;
};
- BVTy ShuffleBuilder(Params...);
+ BVTy ShuffleBuilder(ScalarTy, Params...);
ResTy Res = ResTy();
SmallVector<int> Mask;
SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
@@ -11851,7 +11916,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
bool UseVecBaseAsInput = false;
SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
SmallVector<SmallVector<const TreeEntry *>> Entries;
- Type *ScalarTy = GatheredScalars.front()->getType();
+ Type *OrigScalarTy = GatheredScalars.front()->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, GatheredScalars.size());
unsigned NumParts = TTI->getNumberOfParts(VecTy);
if (NumParts == 0 || NumParts >= GatheredScalars.size())
@@ -11886,7 +11951,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
GatheredScalars.size() != VF) {
Resized = true;
GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
+ PoisonValue::get(OrigScalarTy));
}
}
}
@@ -11946,12 +12011,12 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
});
}))
GatheredScalars.append(VF - GatheredScalars.size(),
- PoisonValue::get(ScalarTy));
+ PoisonValue::get(OrigScalarTy));
}
// Remove shuffled elements from list of gathers.
for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
if (Mask[I] != PoisonMaskElem)
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
}
}
}
@@ -11962,7 +12027,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// such sequences.
bool IsSplat = IsRootPoison && isSplat(Scalars) &&
(Scalars.size() > 2 || Scalars.front() == Scalars.back());
- Scalars.append(VF - Scalars.size(), PoisonValue::get(ScalarTy));
+ Scalars.append(VF - Scalars.size(), PoisonValue::get(OrigScalarTy));
SmallVector<int> UndefPos;
DenseMap<Value *, unsigned> UniquePositions;
// Gather unique non-const values and all constant values.
@@ -11984,7 +12049,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
++NumNonConsts;
SinglePos = I;
Value *OrigV = V;
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
if (IsSplat) {
Scalars.front() = OrigV;
ReuseMask[I] = 0;
@@ -12000,7 +12065,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
ReuseMask.assign(VF, PoisonMaskElem);
std::swap(Scalars.front(), Scalars[SinglePos]);
if (!UndefPos.empty() && UndefPos.front() == 0)
- Scalars.front() = UndefValue::get(ScalarTy);
+ Scalars.front() = UndefValue::get(OrigScalarTy);
}
ReuseMask[SinglePos] = SinglePos;
} else if (!UndefPos.empty() && IsSplat) {
@@ -12030,7 +12095,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// Replace the undef by the poison, in the mask it is replaced by
// non-poisoned scalar already.
if (I != Pos)
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
}
} else {
// Replace undefs by the poisons, emit broadcast and then emit
@@ -12038,7 +12103,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
for (int I : UndefPos) {
ReuseMask[I] = PoisonMaskElem;
if (isa<UndefValue>(Scalars[I]))
- Scalars[I] = PoisonValue::get(ScalarTy);
+ Scalars[I] = PoisonValue::get(OrigScalarTy);
}
NeedFreeze = true;
}
@@ -12093,9 +12158,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
} else {
IsUsedInExpr = false;
- ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
- ScalarTy, GatheredScalars.size())),
- ExtractMask, /*ForExtracts=*/true);
+ ShuffleBuilder.add(PoisonValue::get(VecTy), ExtractMask,
+ /*ForExtracts=*/true);
}
}
if (!GatherShuffles.empty()) {
@@ -12176,9 +12240,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
// contains only constant to build final vector and then shuffle.
for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
if (EnoughConstsForShuffle && isa<Constant>(GatheredScalars[I]))
- NonConstants[I] = PoisonValue::get(ScalarTy);
+ NonConstants[I] = PoisonValue::get(OrigScalarTy);
else
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ GatheredScalars[I] = PoisonValue::get(OrigScalarTy);
}
// Generate constants for final shuffle and build a mask for them.
if (!all_of(GatheredScalars, IsaPred<PoisonValue>)) {
@@ -12224,9 +12288,9 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
return Res;
}
-Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
- return processBuildVector<ShuffleInstructionBuilder, Value *>(E, Builder,
- *this);
+Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy) {
+ return processBuildVector<ShuffleInstructionBuilder, Value *>(E, ScalarTy,
+ Builder, *this);
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
@@ -12239,18 +12303,28 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
return E->VectorizedValue;
}
+ Value *V = E->Scalars.front();
+ Type *ScalarTy = V->getType();
+ if (auto *Store = dyn_cast<StoreInst>(V))
+ ScalarTy = Store->getValueOperand()->getType();
+ else if (auto *IE = dyn_cast<InsertElementInst>(V))
+ ScalarTy = IE->getOperand(1)->getType();
+ auto It = MinBWs.find(E);
+ if (It != MinBWs.end())
+ ScalarTy = IntegerType::get(F->getContext(), It->second.first);
+ auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
if (E->State == TreeEntry::NeedToGather) {
// Set insert point for non-reduction initial nodes.
if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
- Value *Vec = createBuildVector(E);
+ Value *Vec = createBuildVector(E, ScalarTy);
E->VectorizedValue = Vec;
return Vec;
}
bool IsReverseOrder = isReverseOrder(E->ReorderIndices);
auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy) {
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(ScalarTy, Builder, *this);
if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =
ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
@@ -12271,14 +12345,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
Instruction *VL0 = E->getMainOp();
- Type *ScalarTy = VL0->getType();
- if (auto *Store = dyn_cast<StoreInst>(VL0))
- ScalarTy = Store->getValueOperand()->getType();
- else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
- ScalarTy = IE->getOperand(1)->getType();
- auto It = MinBWs.find(E);
- if (It != MinBWs.end())
- ScalarTy = IntegerType::get(F->getContext(), It->second.first);
auto GetOperandSignedness = [&](unsigned Idx) {
const TreeEntry *OpE = getOperandEntry(E, Idx);
bool IsSigned = false;
@@ -12291,7 +12357,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
});
return IsSigned;
};
- auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
assert((E->ReorderIndices.empty() || !E->ReuseShuffleIndices.empty() ||
@@ -13545,7 +13610,8 @@ Value *BoUpSLP::vectorizeTree(
else
CombinedMask2[I] = Mask[I] - VF;
}
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleInstructionBuilder ShuffleBuilder(
+ cast<VectorType>(V1->getType())->getElementType(), Builder, *this);
ShuffleBuilder.add(V1, CombinedMask1);
if (V2)
ShuffleBuilder.add(V2, CombinedMask2);
@@ -14580,13 +14646,27 @@ bool BoUpSLP::collectValuesToDemote(
return false;
bool Res = all_of(
E.Scalars, std::bind(IsPotentiallyTruncated, _1, std::ref(BitWidth)));
- // Gather demoted constant operands.
- if (Res && E.State == TreeEntry::NeedToGather &&
- all_of(E.Scalars, IsaPred<Constant>))
- ToDemote.push_back(E.Idx);
+ // Demote gathers.
+ if (Res && E.State == TreeEntry::NeedToGather) {
+ // Check possible extractelement instructions bases and final vector
+ // length.
+ SmallPtrSet<Value *, 4> UniqueBases;
+ for (Value *V : E.Scalars) {
+ auto *EE = dyn_cast<ExtractElementInst>(V);
+ if (!EE)
+ continue;
+ UniqueBases.insert(EE->getVectorOperand());
+ }
+ const unsigned VF = E.Scalars.size();
+ Type *OrigScalarTy = E.Scalars.front()->getType();
+ if (UniqueBases.size() <= 2 ||
+ TTI->getNumberOfParts(FixedVectorType::get(OrigScalarTy, VF)) ==
+ TTI->getNumberOfParts(FixedVectorType::get(
+ IntegerType::get(OrigScalarTy->getContext(), BitWidth), VF)))
+ ToDemote.push_back(E.Idx);
+ }
return Res;
};
- // TODO: improve handling of gathered values and others.
if (E.State == TreeEntry::NeedToGather || !Visited.insert(&E).second ||
any_of(E.Scalars, [&](Value *V) {
return all_of(V->users(), [&](User *U) {
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
index 6907724..3771ec4 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-buildvector-with-minbitwidth-user.ll
@@ -5,12 +5,7 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 0, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i32> [[TMP0]] to <8 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i1> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP4]] to <8 x i16>
-; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
index d51ef0b..76bb882 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-with-minbith-user.ll
@@ -5,7 +5,8 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <8 x i32> zeroinitializer to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 0 to i1
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i1> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
index 6404cf4..2ab6e91 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/user-node-not-in-bitwidths.ll
@@ -5,7 +5,12 @@ define void @h() {
; CHECK-LABEL: define void @h() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr null, i64 16
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[ARRAYIDX2]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 0 to i1
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 poison, i1 false, i1 false, i1 false>, i1 [[TMP0]], i32 4
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i1> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i1> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i1> [[TMP3]] to <8 x i16>
+; CHECK-NEXT: store <8 x i16> [[TMP4]], ptr [[ARRAYIDX2]], align 2
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
index 7b4e2b0..1bb87bf 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll
@@ -7,9 +7,9 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) {
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP0]] to i128
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[B_PROMOTED_I]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i128> poison, i128 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP2]] to i16
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i16> poison, i16 [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]])
; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP9]] to i64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
index 668d3c3..0ab5627 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll
@@ -16,8 +16,7 @@ define void @test() {
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP15]] to <4 x i32>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])