aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp143
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll14
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll171
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll40
4 files changed, 162 insertions, 206 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5b9fe1c..7fa1b433 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1703,9 +1703,44 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
return NItem;
}
+/// Detect concat of multiple values into a vector
+static bool isFreeConcat(ArrayRef<InstLane> Item,
+ const TargetTransformInfo &TTI) {
+ auto *Ty = cast<FixedVectorType>(Item.front().first->get()->getType());
+ unsigned NumElts = Ty->getNumElements();
+ if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0)
+ return false;
+
+ // Check that the concat is free, usually meaning that the type will be split
+ // during legalization.
+ SmallVector<int, 16> ConcatMask(NumElts * 2);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
+ TTI::TCK_RecipThroughput) != 0)
+ return false;
+
+ unsigned NumSlices = Item.size() / NumElts;
+ // Currently we generate a tree of shuffles for the concats, which limits us
+ // to a power2.
+ if (!isPowerOf2_32(NumSlices))
+ return false;
+ for (unsigned Slice = 0; Slice < NumSlices; ++Slice) {
+ Use *SliceV = Item[Slice * NumElts].first;
+ if (!SliceV || SliceV->get()->getType() != Ty)
+ return false;
+ for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
+ auto [V, Lane] = Item[Slice * NumElts + Elt];
+ if (Lane != static_cast<int>(Elt) || SliceV->get() != V->get())
+ return false;
+ }
+ }
+ return true;
+}
+
static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
const SmallPtrSet<Use *, 4> &IdentityLeafs,
const SmallPtrSet<Use *, 4> &SplatLeafs,
+ const SmallPtrSet<Use *, 4> &ConcatLeafs,
IRBuilder<> &Builder) {
auto [FrontU, FrontLane] = Item.front();
@@ -1713,13 +1748,28 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
return FrontU->get();
}
if (SplatLeafs.contains(FrontU)) {
- if (auto *ILI = dyn_cast<Instruction>(FrontU))
- Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
- else if (auto *Arg = dyn_cast<Argument>(FrontU))
- Builder.SetInsertPointPastAllocas(Arg->getParent());
SmallVector<int, 16> Mask(Ty->getNumElements(), FrontLane);
return Builder.CreateShuffleVector(FrontU->get(), Mask);
}
+ if (ConcatLeafs.contains(FrontU)) {
+ unsigned NumElts =
+ cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
+ SmallVector<Value *> Values(Item.size() / NumElts, nullptr);
+ for (unsigned S = 0; S < Values.size(); ++S)
+ Values[S] = Item[S * NumElts].first->get();
+
+ while (Values.size() > 1) {
+ NumElts *= 2;
+ SmallVector<int, 16> Mask(NumElts, 0);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ SmallVector<Value *> NewValues(Values.size() / 2, nullptr);
+ for (unsigned S = 0; S < NewValues.size(); ++S)
+ NewValues[S] =
+ Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
+ Values = NewValues;
+ }
+ return Values[0];
+ }
auto *I = cast<Instruction>(FrontU->get());
auto *II = dyn_cast<IntrinsicInst>(I);
@@ -1730,8 +1780,9 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
Ops[Idx] = II->getOperand(Idx);
continue;
}
- Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx),
- Ty, IdentityLeafs, SplatLeafs, Builder);
+ Ops[Idx] =
+ generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), Ty,
+ IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
}
SmallVector<Value *, 8> ValueList;
@@ -1739,7 +1790,6 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (Lane.first)
ValueList.push_back(Lane.first->get());
- Builder.SetInsertPoint(I);
Type *DstTy =
FixedVectorType::get(I->getType()->getScalarType(), Ty->getNumElements());
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
@@ -1790,7 +1840,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
SmallVector<SmallVector<InstLane>> Worklist;
Worklist.push_back(Start);
- SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs;
+ SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
unsigned NumVisited = 0;
while (!Worklist.empty()) {
@@ -1839,7 +1889,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// We need each element to be the same type of value, and check that each
// element has a single use.
- if (!all_of(drop_begin(Item), [Item](InstLane IL) {
+ if (all_of(drop_begin(Item), [Item](InstLane IL) {
Value *FrontV = Item.front().first->get();
if (!IL.first)
return true;
@@ -1860,40 +1910,49 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
return !II || (isa<IntrinsicInst>(FrontV) &&
II->getIntrinsicID() ==
cast<IntrinsicInst>(FrontV)->getIntrinsicID());
- }))
- return false;
-
- // Check the operator is one that we support. We exclude div/rem in case
- // they hit UB from poison lanes.
- if ((isa<BinaryOperator>(FrontU) &&
- !cast<BinaryOperator>(FrontU)->isIntDivRem()) ||
- isa<CmpInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- } else if (isa<SelectInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
- } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
- II && isTriviallyVectorizable(II->getIntrinsicID())) {
- for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
- if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
- if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
- Value *FrontV = Item.front().first->get();
- Use *U = IL.first;
- return !U || (cast<Instruction>(U->get())->getOperand(Op) ==
- cast<Instruction>(FrontV)->getOperand(Op));
- }))
- return false;
- continue;
+ })) {
+ // Check the operator is one that we support.
+ if (isa<BinaryOperator, CmpInst>(FrontU)) {
+ // We exclude div/rem in case they hit UB from poison lanes.
+ if (auto *BO = dyn_cast<BinaryOperator>(FrontU);
+ BO && BO->isIntDivRem())
+ return false;
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ continue;
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ continue;
+ } else if (isa<SelectInst>(FrontU)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
+ continue;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
+ II && isTriviallyVectorizable(II->getIntrinsicID())) {
+ for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
+ if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
+ if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
+ Value *FrontV = Item.front().first->get();
+ Use *U = IL.first;
+ return !U || (cast<Instruction>(U->get())->getOperand(Op) ==
+ cast<Instruction>(FrontV)->getOperand(Op));
+ }))
+ return false;
+ continue;
+ }
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
}
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
+ continue;
}
- } else {
- return false;
}
+
+ if (isFreeConcat(Item, TTI)) {
+ ConcatLeafs.insert(FrontU);
+ continue;
+ }
+
+ return false;
}
if (NumVisited <= 1)
@@ -1901,7 +1960,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// If we got this far, we know the shuffles are superfluous and can be
// removed. Scan through again and generate the new tree of instructions.
- Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, Builder);
+ Builder.SetInsertPoint(&I);
+ Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs,
+ ConcatLeafs, Builder);
replaceValue(I, *V);
return true;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
index c085e10..3ee8ba5 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
@@ -22,9 +22,9 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -403,12 +403,12 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[Z:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC31:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]]
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP4]], [[WIDE_VEC36]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
index d725329..7aba1bb 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
@@ -82,15 +82,9 @@ define <8 x i8> @concata_addmul_small(<4 x i8> %a1, <4 x i8> %a2, <8 x i8> %b, <
define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b, <8 x i32> %c) {
; CHECK-LABEL: @concata_addmul_big(
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[XB:%.*]] = mul <4 x i32> [[A1:%.*]], [[BB]]
-; CHECK-NEXT: [[XT:%.*]] = mul <4 x i32> [[A2:%.*]], [[BT]]
-; CHECK-NEXT: [[YB:%.*]] = add <4 x i32> [[XB]], [[CB]]
-; CHECK-NEXT: [[YT:%.*]] = add <4 x i32> [[XT]], [[CT]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[YB]], <4 x i32> [[YT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP2]], [[C:%.*]]
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%bb = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -107,29 +101,11 @@ define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b,
define <16 x i32> @concata_addmul_bigger(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @concata_addmul_bigger(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Y1]], <4 x i32> [[Y2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Y3]], <4 x i32> [[Y4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -213,34 +189,13 @@ define <16 x i32> @concata_addmul_bigger_undef(<4 x i32> %a1a, <4 x i32> %a2a, <
define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @splat_concat(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[SPLATA:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[SPLATA]]
-; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[SPLATA]]
-; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[SPLATA]]
-; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[SPLATA]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -276,33 +231,15 @@ define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a,
define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @two_concats(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[A1]]
-; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[A1]]
-; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[A1]]
-; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[A1]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP8]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -338,57 +275,15 @@ define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <
define <16 x double> @konkat(<16 x double> %wide.vec, <16 x double> %wide.vec115, <2 x double> %l27, <2 x double> %l28, <2 x double> %l29, <2 x double> %l30) {
; CHECK-LABEL: @konkat(
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L32:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT102:%.*]] = shufflevector <2 x double> [[L32]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT104:%.*]] = shufflevector <2 x double> [[L28:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L33:%.*]] = shufflevector <2 x double> [[L28]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT106:%.*]] = shufflevector <2 x double> [[L33]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT108:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L34:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT110:%.*]] = shufflevector <2 x double> [[L34]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT112:%.*]] = shufflevector <2 x double> [[L30:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L35:%.*]] = shufflevector <2 x double> [[L30]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT114:%.*]] = shufflevector <2 x double> [[L35]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC:%.*]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC94:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC95:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC96:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC97:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC98:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC99:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC100:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC94]], [[BROADCAST_SPLAT102]]
-; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC95]], [[BROADCAST_SPLAT104]]
-; CHECK-NEXT: [[TMP4:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC96]], [[BROADCAST_SPLAT106]]
-; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC97]], [[BROADCAST_SPLAT108]]
-; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC98]], [[BROADCAST_SPLAT110]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC99]], [[BROADCAST_SPLAT112]]
-; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC100]], [[BROADCAST_SPLAT114]]
-; CHECK-NEXT: [[STRIDED_VEC116:%.*]] = shufflevector <16 x double> [[WIDE_VEC115:%.*]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC117:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC118:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC119:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC120:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC121:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC122:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC123:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC116]], [[TMP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC117]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC118]], [[TMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC119]], [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC120]], [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC121]], [[TMP6]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC122]], [[TMP7]]
-; CHECK-NEXT: [[TMP16:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC123]], [[TMP8]]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x double> [[TMP19]], <4 x double> [[TMP20]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> [[L28:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> [[L30:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> [[L28]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> [[L30]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <16 x double> [[WIDE_VEC:%.*]], [[TMP7]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc nsz contract <16 x double> [[WIDE_VEC115:%.*]], [[TMP8]]
; CHECK-NEXT: ret <16 x double> [[INTERLEAVED_VEC]]
;
%broadcast.splat = shufflevector <2 x double> %l27, <2 x double> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 9ad042c..e6899d1 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -262,9 +262,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splattwice(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer
@@ -352,9 +352,9 @@ define <8 x half> @constantsplatf(<8 x half> %a) {
define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: @inner_shuffle(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -839,16 +839,16 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
; CHECK-LABEL: @v8f64interleave(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
-; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56
-; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
+; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -56
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP6]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP7]]
+; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -905,10 +905,10 @@ entry:
define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: @singleop(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
@@ -953,9 +953,9 @@ define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %val) {
; CHECK-LABEL: @maximal_legal_fpmath(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[VAL:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[VEC1:%.*]] = load <16 x float>, ptr [[ADDR1:%.*]], align 4
; CHECK-NEXT: [[VEC2:%.*]] = load <16 x float>, ptr [[ADDR2:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <16 x float> [[TMP1]], [[VEC2]]
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc contract <16 x float> [[VEC1]], [[TMP2]]
; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[RESULT:%.*]], align 4