From efa8463ab90147aacd4647eb7715763978235890 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 25 Jun 2024 07:55:08 +0100 Subject: [VectorCombine] Add free concats to shuffleToIdentity. (#94954) This is another relatively small adjustment to shuffleToIdentity, which has had a few knock-one effects to need a few more changes. It attempts to detect free concats, that will be legalized to multiple vector operations. For example if the lanes are '[a[0], a[1], b[0], b[1]]' and a and b are v2f64 under aarch64. In order to do this: - isFreeConcat detects whether the input has piece-wise identities from multiple inputs that can become a concat. - A tree of concat shuffles is created to concatenate the input values into a single vector. This is a little different to most other inputs as there are created from multiple values that are being combined together, and we cannot rely on the Lane0 insert location always being valid. - The insert location is changed to the original location instead of updating per item, which ensure it is valid due to the order that we visit and create items. --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 143 ++++++++++++----- .../AArch64/interleavevectorization.ll | 14 +- .../AArch64/shuffletoidentity-concat.ll | 171 ++++----------------- .../VectorCombine/AArch64/shuffletoidentity.ll | 40 ++--- 4 files changed, 162 insertions(+), 206 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 5b9fe1c..7fa1b433 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1703,9 +1703,44 @@ generateInstLaneVectorFromOperand(ArrayRef Item, int Op) { return NItem; } +/// Detect concat of multiple values into a vector +static bool isFreeConcat(ArrayRef Item, + const TargetTransformInfo &TTI) { + auto *Ty = cast(Item.front().first->get()->getType()); + unsigned NumElts = Ty->getNumElements(); + if (Item.size() == NumElts || NumElts == 1 || Item.size() % NumElts != 0) + return false; + + // Check that the concat is free, usually meaning that the type will be split + // during legalization. + SmallVector ConcatMask(NumElts * 2); + std::iota(ConcatMask.begin(), ConcatMask.end(), 0); + if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask, + TTI::TCK_RecipThroughput) != 0) + return false; + + unsigned NumSlices = Item.size() / NumElts; + // Currently we generate a tree of shuffles for the concats, which limits us + // to a power2. + if (!isPowerOf2_32(NumSlices)) + return false; + for (unsigned Slice = 0; Slice < NumSlices; ++Slice) { + Use *SliceV = Item[Slice * NumElts].first; + if (!SliceV || SliceV->get()->getType() != Ty) + return false; + for (unsigned Elt = 0; Elt < NumElts; ++Elt) { + auto [V, Lane] = Item[Slice * NumElts + Elt]; + if (Lane != static_cast(Elt) || SliceV->get() != V->get()) + return false; + } + } + return true; +} + static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, const SmallPtrSet &IdentityLeafs, const SmallPtrSet &SplatLeafs, + const SmallPtrSet &ConcatLeafs, IRBuilder<> &Builder) { auto [FrontU, FrontLane] = Item.front(); @@ -1713,13 +1748,28 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, return FrontU->get(); } if (SplatLeafs.contains(FrontU)) { - if (auto *ILI = dyn_cast(FrontU)) - Builder.SetInsertPoint(*ILI->getInsertionPointAfterDef()); - else if (auto *Arg = dyn_cast(FrontU)) - Builder.SetInsertPointPastAllocas(Arg->getParent()); SmallVector Mask(Ty->getNumElements(), FrontLane); return Builder.CreateShuffleVector(FrontU->get(), Mask); } + if (ConcatLeafs.contains(FrontU)) { + unsigned NumElts = + cast(FrontU->get()->getType())->getNumElements(); + SmallVector Values(Item.size() / NumElts, nullptr); + for (unsigned S = 0; S < Values.size(); ++S) + Values[S] = Item[S * NumElts].first->get(); + + while (Values.size() > 1) { + NumElts *= 2; + SmallVector Mask(NumElts, 0); + std::iota(Mask.begin(), Mask.end(), 0); + SmallVector NewValues(Values.size() / 2, nullptr); + for (unsigned S = 0; S < NewValues.size(); ++S) + NewValues[S] = + Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask); + Values = NewValues; + } + return Values[0]; + } auto *I = cast(FrontU->get()); auto *II = dyn_cast(I); @@ -1730,8 +1780,9 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, Ops[Idx] = II->getOperand(Idx); continue; } - Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), - Ty, IdentityLeafs, SplatLeafs, Builder); + Ops[Idx] = + generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), Ty, + IdentityLeafs, SplatLeafs, ConcatLeafs, Builder); } SmallVector ValueList; @@ -1739,7 +1790,6 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, if (Lane.first) ValueList.push_back(Lane.first->get()); - Builder.SetInsertPoint(I); Type *DstTy = FixedVectorType::get(I->getType()->getScalarType(), Ty->getNumElements()); if (auto *BI = dyn_cast(I)) { @@ -1790,7 +1840,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { SmallVector> Worklist; Worklist.push_back(Start); - SmallPtrSet IdentityLeafs, SplatLeafs; + SmallPtrSet IdentityLeafs, SplatLeafs, ConcatLeafs; unsigned NumVisited = 0; while (!Worklist.empty()) { @@ -1839,7 +1889,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // We need each element to be the same type of value, and check that each // element has a single use. - if (!all_of(drop_begin(Item), [Item](InstLane IL) { + if (all_of(drop_begin(Item), [Item](InstLane IL) { Value *FrontV = Item.front().first->get(); if (!IL.first) return true; @@ -1860,40 +1910,49 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { return !II || (isa(FrontV) && II->getIntrinsicID() == cast(FrontV)->getIntrinsicID()); - })) - return false; - - // Check the operator is one that we support. We exclude div/rem in case - // they hit UB from poison lanes. - if ((isa(FrontU) && - !cast(FrontU)->isIntDivRem()) || - isa(FrontU)) { - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1)); - } else if (isa(FrontU)) { - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); - } else if (isa(FrontU)) { - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1)); - Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2)); - } else if (auto *II = dyn_cast(FrontU); - II && isTriviallyVectorizable(II->getIntrinsicID())) { - for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) { - if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) { - if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) { - Value *FrontV = Item.front().first->get(); - Use *U = IL.first; - return !U || (cast(U->get())->getOperand(Op) == - cast(FrontV)->getOperand(Op)); - })) - return false; - continue; + })) { + // Check the operator is one that we support. + if (isa(FrontU)) { + // We exclude div/rem in case they hit UB from poison lanes. + if (auto *BO = dyn_cast(FrontU); + BO && BO->isIntDivRem()) + return false; + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1)); + continue; + } else if (isa(FrontU)) { + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); + continue; + } else if (isa(FrontU)) { + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0)); + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1)); + Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2)); + continue; + } else if (auto *II = dyn_cast(FrontU); + II && isTriviallyVectorizable(II->getIntrinsicID())) { + for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) { + if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) { + if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) { + Value *FrontV = Item.front().first->get(); + Use *U = IL.first; + return !U || (cast(U->get())->getOperand(Op) == + cast(FrontV)->getOperand(Op)); + })) + return false; + continue; + } + Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op)); } - Worklist.push_back(generateInstLaneVectorFromOperand(Item, Op)); + continue; } - } else { - return false; } + + if (isFreeConcat(Item, TTI)) { + ConcatLeafs.insert(FrontU); + continue; + } + + return false; } if (NumVisited <= 1) @@ -1901,7 +1960,9 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // If we got this far, we know the shuffles are superfluous and can be // removed. Scan through again and generate the new tree of instructions. - Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, Builder); + Builder.SetInsertPoint(&I); + Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, + ConcatLeafs, Builder); replaceValue(I, *V); return true; } diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll index c085e10..3ee8ba5 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleavevectorization.ll @@ -22,9 +22,9 @@ define void @add4(ptr noalias noundef %x, ptr noalias noundef %y, i32 noundef %n ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC24:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] ; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP2]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[WIDE_VEC24]], [[WIDE_VEC]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -403,12 +403,12 @@ define void @addmul(ptr noalias noundef %x, ptr noundef %y, ptr noundef %z, i32 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i16>, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[Z:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC31:%.*]] = load <32 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP2:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP2]], [[WIDE_VEC36]] -; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[X]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <32 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[INVARIANT_GEP]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <32 x i16> [[WIDE_VEC31]], [[WIDE_VEC]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = add <32 x i16> [[TMP4]], [[WIDE_VEC36]] ; CHECK-NEXT: store <32 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll index d725329..7aba1bb 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity-concat.ll @@ -82,15 +82,9 @@ define <8 x i8> @concata_addmul_small(<4 x i8> %a1, <4 x i8> %a2, <8 x i8> %b, < define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b, <8 x i32> %c) { ; CHECK-LABEL: @concata_addmul_big( -; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x i32> [[C:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x i32> [[C]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[XB:%.*]] = mul <4 x i32> [[A1:%.*]], [[BB]] -; CHECK-NEXT: [[XT:%.*]] = mul <4 x i32> [[A2:%.*]], [[BT]] -; CHECK-NEXT: [[YB:%.*]] = add <4 x i32> [[XB]], [[CB]] -; CHECK-NEXT: [[YT:%.*]] = add <4 x i32> [[XT]], [[CT]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[YB]], <4 x i32> [[YT]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = add <8 x i32> [[TMP2]], [[C:%.*]] ; CHECK-NEXT: ret <8 x i32> [[R]] ; %bb = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> @@ -107,29 +101,11 @@ define <8 x i32> @concata_addmul_big(<4 x i32> %a1, <4 x i32> %a2, <8 x i32> %b, define <16 x i32> @concata_addmul_bigger(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) { ; CHECK-LABEL: @concata_addmul_bigger( -; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]] -; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]] -; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]] -; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]] -; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]] -; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]] -; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]] -; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Y1]], <4 x i32> [[Y2]], <8 x i32> -; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Y3]], <4 x i32> [[Y4]], <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]] +; CHECK-NEXT: [[R:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]] ; CHECK-NEXT: ret <16 x i32> [[R]] ; %a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> @@ -213,34 +189,13 @@ define <16 x i32> @concata_addmul_bigger_undef(<4 x i32> %a1a, <4 x i32> %a2a, < define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) { ; CHECK-LABEL: @splat_concat( -; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SPLATA:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]] -; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]] -; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]] -; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]] -; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]] -; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]] -; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]] -; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[SPLATA]] -; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[SPLATA]] -; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[SPLATA]] -; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[SPLATA]] -; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> -; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A4A]], <4 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP6]] ; CHECK-NEXT: ret <16 x i32> [[R]] ; %a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> @@ -276,33 +231,15 @@ define <16 x i32> @splat_concat(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, <4 x i32> %a4a, <16 x i32> %b, <16 x i32> %c) { ; CHECK-LABEL: @two_concats( -; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A1A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A3:%.*]] = shufflevector <4 x i32> [[A3A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A4:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B1:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B2:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B3:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[B4:%.*]] = shufflevector <16 x i32> [[B]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C1:%.*]] = shufflevector <16 x i32> [[C:%.*]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C2:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C3:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[C4:%.*]] = shufflevector <16 x i32> [[C]], <16 x i32> poison, <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = mul <4 x i32> [[A1]], [[B1]] -; CHECK-NEXT: [[X2:%.*]] = mul <4 x i32> [[A2]], [[B2]] -; CHECK-NEXT: [[X3:%.*]] = mul <4 x i32> [[A3]], [[B3]] -; CHECK-NEXT: [[X4:%.*]] = mul <4 x i32> [[A4]], [[B4]] -; CHECK-NEXT: [[Y1:%.*]] = add <4 x i32> [[X1]], [[C1]] -; CHECK-NEXT: [[Y2:%.*]] = add <4 x i32> [[X2]], [[C2]] -; CHECK-NEXT: [[Y3:%.*]] = add <4 x i32> [[X3]], [[C3]] -; CHECK-NEXT: [[Y4:%.*]] = add <4 x i32> [[X4]], [[C4]] -; CHECK-NEXT: [[Z1:%.*]] = xor <4 x i32> [[Y1]], [[A1]] -; CHECK-NEXT: [[Z2:%.*]] = xor <4 x i32> [[Y2]], [[A1]] -; CHECK-NEXT: [[Z3:%.*]] = xor <4 x i32> [[Y3]], [[A1]] -; CHECK-NEXT: [[Z4:%.*]] = xor <4 x i32> [[Y4]], [[A1]] -; CHECK-NEXT: [[CC1:%.*]] = shufflevector <4 x i32> [[Z1]], <4 x i32> [[Z2]], <8 x i32> -; CHECK-NEXT: [[CC2:%.*]] = shufflevector <4 x i32> [[Z3]], <4 x i32> [[Z4]], <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[CC1]], <8 x i32> [[CC2]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A4A:%.*]], <4 x i32> [[A3A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[A2A:%.*]], <4 x i32> [[A1A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[TMP3]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[TMP4]], [[C:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[A1A]], <4 x i32> [[A1A]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = xor <16 x i32> [[TMP5]], [[TMP8]] ; CHECK-NEXT: ret <16 x i32> [[R]] ; %a1 = shufflevector <4 x i32> %a1a, <4 x i32> poison, <4 x i32> @@ -338,57 +275,15 @@ define <16 x i32> @two_concats(<4 x i32> %a1a, <4 x i32> %a2a, <4 x i32> %a3a, < define <16 x double> @konkat(<16 x double> %wide.vec, <16 x double> %wide.vec115, <2 x double> %l27, <2 x double> %l28, <2 x double> %l29, <2 x double> %l30) { ; CHECK-LABEL: @konkat( -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[L32:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[BROADCAST_SPLAT102:%.*]] = shufflevector <2 x double> [[L32]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLAT104:%.*]] = shufflevector <2 x double> [[L28:%.*]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[L33:%.*]] = shufflevector <2 x double> [[L28]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[BROADCAST_SPLAT106:%.*]] = shufflevector <2 x double> [[L33]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLAT108:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[L34:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[BROADCAST_SPLAT110:%.*]] = shufflevector <2 x double> [[L34]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLAT112:%.*]] = shufflevector <2 x double> [[L30:%.*]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[L35:%.*]] = shufflevector <2 x double> [[L30]], <2 x double> poison, <2 x i32> -; CHECK-NEXT: [[BROADCAST_SPLAT114:%.*]] = shufflevector <2 x double> [[L35]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC:%.*]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC94:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC95:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC96:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC97:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC98:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC99:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC100:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC94]], [[BROADCAST_SPLAT102]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC95]], [[BROADCAST_SPLAT104]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC96]], [[BROADCAST_SPLAT106]] -; CHECK-NEXT: [[TMP5:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC97]], [[BROADCAST_SPLAT108]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC98]], [[BROADCAST_SPLAT110]] -; CHECK-NEXT: [[TMP7:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC99]], [[BROADCAST_SPLAT112]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <2 x double> [[STRIDED_VEC100]], [[BROADCAST_SPLAT114]] -; CHECK-NEXT: [[STRIDED_VEC116:%.*]] = shufflevector <16 x double> [[WIDE_VEC115:%.*]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC117:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC118:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC119:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC120:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC121:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC122:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC123:%.*]] = shufflevector <16 x double> [[WIDE_VEC115]], <16 x double> poison, <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC116]], [[TMP1]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC117]], [[TMP2]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC118]], [[TMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC119]], [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC120]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC121]], [[TMP6]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC122]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = fadd reassoc nsz contract <2 x double> [[STRIDED_VEC123]], [[TMP8]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <4 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[TMP13]], <2 x double> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> [[TMP16]], <4 x i32> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x double> [[TMP17]], <4 x double> [[TMP18]], <8 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x double> [[TMP19]], <4 x double> [[TMP20]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[L27:%.*]], <2 x double> [[L28:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[L29:%.*]], <2 x double> [[L30:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[L27]], <2 x double> [[L28]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[L29]], <2 x double> [[L30]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = fmul reassoc nsz contract <16 x double> [[WIDE_VEC:%.*]], [[TMP7]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc nsz contract <16 x double> [[WIDE_VEC115:%.*]], [[TMP8]] ; CHECK-NEXT: ret <16 x double> [[INTERLEAVED_VEC]] ; %broadcast.splat = shufflevector <2 x double> %l27, <2 x double> poison, <2 x i32> zeroinitializer diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll index 9ad042c..e6899d1 100644 --- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -262,9 +262,9 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: @splattwice( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x half> [[R]] ; %as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer @@ -352,9 +352,9 @@ define <8 x half> @constantsplatf(<8 x half> %a) { define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { ; CHECK-LABEL: @inner_shuffle( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = mul <8 x i8> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[R:%.*]] = add <8 x i8> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <8 x i8> [[R]] ; %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> @@ -839,16 +839,16 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) { ; CHECK-LABEL: @v8f64interleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] -; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56 -; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] +; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 7 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -56 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP6]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP7]] +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -905,10 +905,10 @@ entry: define <4 x i8> @singleop(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @singleop( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[A:%.*]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[B:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[R:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8> ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -953,9 +953,9 @@ define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) { define void @maximal_legal_fpmath(ptr %addr1, ptr %addr2, ptr %result, float %val) { ; CHECK-LABEL: @maximal_legal_fpmath( ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[VAL:%.*]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[VEC1:%.*]] = load <16 x float>, ptr [[ADDR1:%.*]], align 4 ; CHECK-NEXT: [[VEC2:%.*]] = load <16 x float>, ptr [[ADDR2:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <16 x float> [[TMP1]], [[VEC2]] ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc contract <16 x float> [[VEC1]], [[TMP2]] ; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[RESULT:%.*]], align 4 -- cgit v1.1