aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2024-06-25 07:55:08 +0100
committerGitHub <noreply@github.com>2024-06-25 07:55:08 +0100
commitefa8463ab90147aacd4647eb7715763978235890 (patch)
tree100fe82117a075ce79e5cd31c6b17b4478e5a48c
parent4c91b49bab0728d4bc136aa33c4aeb4e8ea37d01 (diff)
downloadllvm-efa8463ab90147aacd4647eb7715763978235890.zip
llvm-efa8463ab90147aacd4647eb7715763978235890.tar.gz
llvm-efa8463ab90147aacd4647eb7715763978235890.tar.bz2
[VectorCombine] Add free concats to shuffleToIdentity. (#94954)
This is another relatively small adjustment to shuffleToIdentity, which has had a few knock-one effects to need a few more changes. It attempts to detect free concats, that will be legalized to multiple vector operations. For example if the lanes are '[a[0], a[1], b[0], b[1]]' and a and b are v2f64 under aarch64. In order to do this: - isFreeConcat detects whether the input has piece-wise identities from multiple inputs that can become a concat. - A tree of concat shuffles is created to concatenate the input values into a single vector. This is a little different to most other inputs as there are created from multiple values that are being combined together, and we cannot rely on the Lane0 insert location always being valid. - The insert location is changed to the original location instead of updating per item, which ensure it is valid due to the order that we visit and create items.
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp143
-rw-r--r--llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll14
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll171
-rw-r--r--llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll40
4 files changed, 162 insertions, 206 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5b9fe1c..7fa1b433 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1703,9 +1703,44 @@ generateInstLaneVectorFromOperand(ArrayRef<InstLane> Item, int Op) {
return NItem;
}
+/// Detect concat of multiple values into a vector
+static bool isFreeConcat(ArrayRef<InstLane> Item,
+ const TargetTransformInfo &TTI) {
+ auto *Ty = cast<FixedVectorType>(Item.front().first->get()->getType());
+ unsigned NumElts = Ty->getNumElements();
+ if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0)
+ return false;
+
+ // Check that the concat is free, usually meaning that the type will be split
+ // during legalization.
+ SmallVector<int, 16> ConcatMask(NumElts * 2);
+ std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
+ if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask,
+ TTI::TCK_RecipThroughput) != 0)
+ return false;
+
+ unsigned NumSlices = Item.size() / NumElts;
+ // Currently we generate a tree of shuffles for the concats, which limits us
+ // to a power2.
+ if (!isPowerOf2_32(NumSlices))
+ return false;
+ for (unsigned Slice = 0; Slice < NumSlices; ++Slice) {
+ Use *SliceV = Item[Slice * NumElts].first;
+ if (!SliceV || SliceV->get()->getType() != Ty)
+ return false;
+ for (unsigned Elt = 0; Elt < NumElts; ++Elt) {
+ auto [V, Lane] = Item[Slice * NumElts + Elt];
+ if (Lane != static_cast<int>(Elt) || SliceV->get() != V->get())
+ return false;
+ }
+ }
+ return true;
+}
+
static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
const SmallPtrSet<Use *, 4> &IdentityLeafs,
const SmallPtrSet<Use *, 4> &SplatLeafs,
+ const SmallPtrSet<Use *, 4> &ConcatLeafs,
IRBuilder<> &Builder) {
auto [FrontU, FrontLane] = Item.front();
@@ -1713,13 +1748,28 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
return FrontU->get();
}
if (SplatLeafs.contains(FrontU)) {
- if (auto *ILI = dyn_cast<Instruction>(FrontU))
- Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef());
- else if (auto *Arg = dyn_cast<Argument>(FrontU))
- Builder.SetInsertPointPastAllocas(Arg->getParent());
SmallVector<int, 16> Mask(Ty->getNumElements(), FrontLane);
return Builder.CreateShuffleVector(FrontU->get(), Mask);
}
+ if (ConcatLeafs.contains(FrontU)) {
+ unsigned NumElts =
+ cast<FixedVectorType>(FrontU->get()->getType())->getNumElements();
+ SmallVector<Value *> Values(Item.size() / NumElts, nullptr);
+ for (unsigned S = 0; S < Values.size(); ++S)
+ Values[S] = Item[S * NumElts].first->get();
+
+ while (Values.size() > 1) {
+ NumElts *= 2;
+ SmallVector<int, 16> Mask(NumElts, 0);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ SmallVector<Value *> NewValues(Values.size() / 2, nullptr);
+ for (unsigned S = 0; S < NewValues.size(); ++S)
+ NewValues[S] =
+ Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
+ Values = NewValues;
+ }
+ return Values[0];
+ }
auto *I = cast<Instruction>(FrontU->get());
auto *II = dyn_cast<IntrinsicInst>(I);
@@ -1730,8 +1780,9 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
Ops[Idx] = II->getOperand(Idx);
continue;
}
- Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx),
- Ty, IdentityLeafs, SplatLeafs, Builder);
+ Ops[Idx] =
+ generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), Ty,
+ IdentityLeafs, SplatLeafs, ConcatLeafs, Builder);
}
SmallVector<Value *, 8> ValueList;
@@ -1739,7 +1790,6 @@ static Value *generateNewInstTree(ArrayRef<InstLane> Item, FixedVectorType *Ty,
if (Lane.first)
ValueList.push_back(Lane.first->get());
- Builder.SetInsertPoint(I);
Type *DstTy =
FixedVectorType::get(I->getType()->getScalarType(), Ty->getNumElements());
if (auto *BI = dyn_cast<BinaryOperator>(I)) {
@@ -1790,7 +1840,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
SmallVector<SmallVector<InstLane>> Worklist;
Worklist.push_back(Start);
- SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs;
+ SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
unsigned NumVisited = 0;
while (!Worklist.empty()) {
@@ -1839,7 +1889,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// We need each element to be the same type of value, and check that each
// element has a single use.
- if (!all_of(drop_begin(Item), [Item](InstLane IL) {
+ if (all_of(drop_begin(Item), [Item](InstLane IL) {
Value *FrontV = Item.front().first->get();
if (!IL.first)
return true;
@@ -1860,40 +1910,49 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
return !II || (isa<IntrinsicInst>(FrontV) &&
II->getIntrinsicID() ==
cast<IntrinsicInst>(FrontV)->getIntrinsicID());
- }))
- return false;
-
- // Check the operator is one that we support. We exclude div/rem in case
- // they hit UB from poison lanes.
- if ((isa<BinaryOperator>(FrontU) &&
- !cast<BinaryOperator>(FrontU)->isIntDivRem()) ||
- isa<CmpInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- } else if (isa<SelectInst>(FrontU)) {
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
- } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
- II && isTriviallyVectorizable(II->getIntrinsicID())) {
- for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
- if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
- if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
- Value *FrontV = Item.front().first->get();
- Use *U = IL.first;
- return !U || (cast<Instruction>(U->get())->getOperand(Op) ==
- cast<Instruction>(FrontV)->getOperand(Op));
- }))
- return false;
- continue;
+ })) {
+ // Check the operator is one that we support.
+ if (isa<BinaryOperator, CmpInst>(FrontU)) {
+ // We exclude div/rem in case they hit UB from poison lanes.
+ if (auto *BO = dyn_cast<BinaryOperator>(FrontU);
+ BO && BO->isIntDivRem())
+ return false;
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ continue;
+ } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ continue;
+ } else if (isa<SelectInst>(FrontU)) {
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
+ continue;
+ } else if (auto *II = dyn_cast<IntrinsicInst>(FrontU);
+ II && isTriviallyVectorizable(II->getIntrinsicID())) {
+ for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
+ if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
+ if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
+ Value *FrontV = Item.front().first->get();
+ Use *U = IL.first;
+ return !U || (cast<Instruction>(U->get())->getOperand(Op) ==
+ cast<Instruction>(FrontV)->getOperand(Op));
+ }))
+ return false;
+ continue;
+ }
+ Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
}
- Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op));
+ continue;
}
- } else {
- return false;
}
+
+ if (isFreeConcat(Item, TTI)) {
+ ConcatLeafs.insert(FrontU);
+ continue;
+ }
+
+ return false;
}
if (NumVisited <= 1)
@@ -1901,7 +1960,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
// If we got this far, we know the shuffles are superfluous and can be
// removed. Scan through again and generate the new tree of instructions.
- Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, Builder);
+ Builder.SetInsertPoint(&I);
+ Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs,
+ ConcatLeafs, Builder);
replaceValue(I, *V);
return true;
}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
index c085e10..3ee8ba5 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll
@@ -22,9 +22,9 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -403,12 +403,12 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[Z:%.*]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_VEC31:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]]
-; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP4]], [[WIDE_VEC36]]
; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
index d725329..7aba1bb 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll
@@ -82,15 +82,9 @@ define <8 x i8> @concata_addmul_small(<4 x i8> %a1, <4 x i8> %a2, <8 x i8> %b, <
define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b, <8 x i32> %c) {
; CHECK-LABEL: @concata_addmul_big(
-; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[XB:%.*]] = mul <4 x i32> [[A1:%.*]], [[BB]]
-; CHECK-NEXT: [[XT:%.*]] = mul <4 x i32> [[A2:%.*]], [[BT]]
-; CHECK-NEXT: [[YB:%.*]] = add <4 x i32> [[XB]], [[CB]]
-; CHECK-NEXT: [[YT:%.*]] = add <4 x i32> [[XT]], [[CT]]
-; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[YB]], <4 x i32> [[YT]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP2]], [[C:%.*]]
; CHECK-NEXT: ret <8 x i32> [[R]]
;
%bb = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -107,29 +101,11 @@ define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b,
define <16 x i32> @concata_addmul_bigger(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @concata_addmul_bigger(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Y1]], <4 x i32> [[Y2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Y3]], <4 x i32> [[Y4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[R:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -213,34 +189,13 @@ define <16 x i32> @concata_addmul_bigger_undef(<4 x i32> %a1a, <4 x i32> %a2a, <
define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @splat_concat(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[SPLATA:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[SPLATA]]
-; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[SPLATA]]
-; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[SPLATA]]
-; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[SPLATA]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP6]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -276,33 +231,15 @@ define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a,
define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) {
; CHECK-LABEL: @two_concats(
-; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> <i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]]
-; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]]
-; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]]
-; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]]
-; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]]
-; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]]
-; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]]
-; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]]
-; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[A1]]
-; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[A1]]
-; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[A1]]
-; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[A1]]
-; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP8]]
; CHECK-NEXT: ret <16 x i32> [[R]]
;
%a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -338,57 +275,15 @@ define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <
define <16 x double> @konkat(<16 x double> %wide.vec, <16 x double> %wide.vec115, <2 x double> %l27, <2 x double> %l28, <2 x double> %l29, <2 x double> %l30) {
; CHECK-LABEL: @konkat(
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L32:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT102:%.*]] = shufflevector <2 x double> [[L32]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT104:%.*]] = shufflevector <2 x double> [[L28:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L33:%.*]] = shufflevector <2 x double> [[L28]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT106:%.*]] = shufflevector <2 x double> [[L33]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT108:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L34:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT110:%.*]] = shufflevector <2 x double> [[L34]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLAT112:%.*]] = shufflevector <2 x double> [[L30:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[L35:%.*]] = shufflevector <2 x double> [[L30]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
-; CHECK-NEXT: [[BROADCAST_SPLAT114:%.*]] = shufflevector <2 x double> [[L35]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC:%.*]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC94:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC95:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC96:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC97:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC98:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC99:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC100:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC94]], [[BROADCAST_SPLAT102]]
-; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC95]], [[BROADCAST_SPLAT104]]
-; CHECK-NEXT: [[TMP4:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC96]], [[BROADCAST_SPLAT106]]
-; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC97]], [[BROADCAST_SPLAT108]]
-; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC98]], [[BROADCAST_SPLAT110]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC99]], [[BROADCAST_SPLAT112]]
-; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC100]], [[BROADCAST_SPLAT114]]
-; CHECK-NEXT: [[STRIDED_VEC116:%.*]] = shufflevector <16 x double> [[WIDE_VEC115:%.*]], <16 x double> poison, <2 x i32> <i32 0, i32 8>
-; CHECK-NEXT: [[STRIDED_VEC117:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 1, i32 9>
-; CHECK-NEXT: [[STRIDED_VEC118:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 2, i32 10>
-; CHECK-NEXT: [[STRIDED_VEC119:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 3, i32 11>
-; CHECK-NEXT: [[STRIDED_VEC120:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 4, i32 12>
-; CHECK-NEXT: [[STRIDED_VEC121:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 5, i32 13>
-; CHECK-NEXT: [[STRIDED_VEC122:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 6, i32 14>
-; CHECK-NEXT: [[STRIDED_VEC123:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC116]], [[TMP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC117]], [[TMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC118]], [[TMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC119]], [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC120]], [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC121]], [[TMP6]]
-; CHECK-NEXT: [[TMP15:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC122]], [[TMP7]]
-; CHECK-NEXT: [[TMP16:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC123]], [[TMP8]]
-; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x double> [[TMP19]], <4 x double> [[TMP20]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> [[L28:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> [[L30:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> [[L28]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> [[L30]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <16 x double> [[WIDE_VEC:%.*]], [[TMP7]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc nsz contract <16 x double> [[WIDE_VEC115:%.*]], [[TMP8]]
; CHECK-NEXT: ret <16 x double> [[INTERLEAVED_VEC]]
;
%broadcast.splat = shufflevector <2 x double> %l27, <2 x double> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 9ad042c..e6899d1 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -262,9 +262,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {
define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splattwice(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
%as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer
@@ -352,9 +352,9 @@ define <8 x half> @constantsplatf(<8 x half> %a) {
define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK-LABEL: @inner_shuffle(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i8> [[R]]
;
%ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -839,16 +839,16 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
; CHECK-LABEL: @v8f64interleave(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
-; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56
-; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
+; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -56
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP6]]
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP7]]
+; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8
; CHECK-NEXT: ret void
;
entry:
@@ -905,10 +905,10 @@ entry:
define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: @singleop(
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16>
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[R]]
;
@@ -953,9 +953,9 @@ define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %val) {
; CHECK-LABEL: @maximal_legal_fpmath(
; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[VAL:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[VEC1:%.*]] = load <16 x float>, ptr [[ADDR1:%.*]], align 4
; CHECK-NEXT: [[VEC2:%.*]] = load <16 x float>, ptr [[ADDR2:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <16 x float> [[TMP1]], [[VEC2]]
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc contract <16 x float> [[VEC1]], [[TMP2]]
; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[RESULT:%.*]], align 4