aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2022-11-30 14:48:32 -0500
committerSanjay Patel <spatel@rotateright.com>2022-11-30 14:52:20 -0500
commite71b81cab09bf33e3b08ed600418b72cc4117461 (patch)
treee932e9e69bcb5a4201c42463a283a9e245fa590a /llvm
parentd5b0de35bdd9a3f4d4a093e7938b06add34678eb (diff)
downloadllvm-e71b81cab09bf33e3b08ed600418b72cc4117461.zip
llvm-e71b81cab09bf33e3b08ed600418b72cc4117461.tar.gz
llvm-e71b81cab09bf33e3b08ed600418b72cc4117461.tar.bz2
[InstCombine] canonicalize trunc + insert as bitcast + shuffle, part 1 (2nd try)
The first attempt was reverted because a clang test changed unexpectedly - the file is already marked with a FIXME, so I just updated it this time to pass. Original commit message: This is the main patch for converting a truncated scalar that is inserted into a vector to bitcast+shuffle. We could go either way on patterns like this, but this direction will allow collapsing a pair of these sequences on the motivating example from issue The patch is split into 3 parts to make it easier to see the progression of tests diffs. We allow inserting/shuffling into a different size vector for flexibility, so there are several test variations. The length-changing is handled by shortening/padding the shuffle mask with undef elements. In part 1, handle the basic pattern: inselt undef, (trunc T), IndexC --> shuffle (bitcast T), IdentityMask Proof for the endian-dependency behaving as expected: https://alive2.llvm.org/ce/z/BsA7yC The TODO items for handling shifts and insert into an arbitrary base vector value are implemented as follow-ups. Differential Revision: https://reviews.llvm.org/D138872
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp64
-rw-r--r--llvm/test/Transforms/InstCombine/insert-trunc.ll117
-rw-r--r--llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll23
-rw-r--r--llvm/test/Transforms/InstCombine/vec_phi_extract.ll23
-rw-r--r--llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll19
-rw-r--r--llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll36
6 files changed, 193 insertions, 89 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 7d613f2..3320bf5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -39,6 +39,7 @@
#include <cassert>
#include <cstdint>
#include <iterator>
+#include <numeric>
#include <utility>
#define DEBUG_TYPE "instcombine"
@@ -1514,6 +1515,66 @@ static Instruction *narrowInsElt(InsertElementInst &InsElt,
return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
}
+/// Try to convert scalar extraction ops (shift+trunc) with insertelt to
+/// bitcast and shuffle:
+/// inselt V, (lshr (trunc X)), IndexC --> shuffle (bitcast X), V, Mask
+static Instruction *foldTruncInsElt(InsertElementInst &InsElt, bool IsBigEndian,
+ InstCombiner::BuilderTy &Builder) {
+ // inselt undef, (trunc T), IndexC
+ // TODO: Allow any base vector value.
+ // TODO: The one-use limitation could be removed for some cases (eg, no
+ // extra shuffle is needed and a shift is eliminated).
+ auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
+ Value *T, *V = InsElt.getOperand(0);
+ uint64_t IndexC;
+ if (!VTy || !match(InsElt.getOperand(1), m_OneUse(m_Trunc(m_Value(T)))) ||
+ !match(InsElt.getOperand(2), m_ConstantInt(IndexC)) ||
+ !match(V, m_Undef()))
+ return nullptr;
+
+ Type *SrcTy = T->getType();
+ unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
+ unsigned VecEltWidth = VTy->getScalarSizeInBits();
+ if (ScalarWidth % VecEltWidth != 0)
+ return nullptr;
+
+ unsigned NumEltsInScalar = ScalarWidth / VecEltWidth;
+ Value *X = T;
+ if ((IsBigEndian && IndexC == NumEltsInScalar - 1) ||
+ (!IsBigEndian && IndexC == 0)) {
+ // The insert is to the LSB end of the vector (depends on endian).
+ // That's all we need.
+ } else {
+ // TODO: Look through a shift-right and translate the insert index.
+ return nullptr;
+ }
+
+ // Bitcast the scalar to a vector type with the destination element type.
+ Type *CastTy = FixedVectorType::get(VTy->getElementType(), NumEltsInScalar);
+ Value *VecX = Builder.CreateBitCast(X, CastTy, "vec." + X->getName());
+
+ unsigned NumElts = VTy->getNumElements();
+ if (NumElts > NumEltsInScalar) {
+ // Pad the source vector with undef elements, so it matches the dest type.
+ SmallVector<int> IdentityPaddedMask(NumElts, UndefMaskElem);
+ for (unsigned i = 0; i != NumEltsInScalar; ++i)
+ IdentityPaddedMask[i] = i;
+ VecX = Builder.CreateShuffleVector(VecX, IdentityPaddedMask);
+ } else if (NumElts < NumEltsInScalar) {
+ // Narrow the source vector, so it matches the dest type.
+ SmallVector<int> IdentityExtractMask(NumElts);
+ std::iota(IdentityExtractMask.begin(), IdentityExtractMask.end(), 0);
+ VecX = Builder.CreateShuffleVector(VecX, IdentityExtractMask);
+ }
+
+ // Insert the truncated element using a select-shuffle. All lanes but one are
+ // from the base vector V.
+ SmallVector<int> SelectMask(NumElts);
+ std::iota(SelectMask.begin(), SelectMask.end(), 0);
+ SelectMask[IndexC] = (int)IndexC + NumElts;
+ return new ShuffleVectorInst(V, VecX, SelectMask);
+}
+
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -1641,6 +1702,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *Ext = narrowInsElt(IE, Builder))
return Ext;
+ if (Instruction *Shuf = foldTruncInsElt(IE, DL.isBigEndian(), Builder))
+ return Shuf;
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/insert-trunc.ll b/llvm/test/Transforms/InstCombine/insert-trunc.ll
index 3ae128e..20922f9 100644
--- a/llvm/test/Transforms/InstCombine/insert-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/insert-trunc.ll
@@ -1,15 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL
-; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL
+; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE
+; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE
declare void @use(i8)
declare void @use64(i64)
define <4 x i16> @low_index_same_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @low_index_same_length_poison_basevec(
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0
-; ALL-NEXT: ret <4 x i16> [[R]]
+; BE-LABEL: @low_index_same_length_poison_basevec(
+; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
+; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0
+; BE-NEXT: ret <4 x i16> [[R]]
+;
+; LE-LABEL: @low_index_same_length_poison_basevec(
+; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
+; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; LE-NEXT: ret <4 x i16> [[R]]
;
%t = trunc i64 %x to i16
%r = insertelement <4 x i16> poison, i16 %t, i64 0
@@ -17,10 +22,15 @@ define <4 x i16> @low_index_same_length_poison_basevec(i64 %x) {
}
define <4 x i16> @high_index_same_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @high_index_same_length_poison_basevec(
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3
-; ALL-NEXT: ret <4 x i16> [[R]]
+; BE-LABEL: @high_index_same_length_poison_basevec(
+; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
+; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 3>
+; BE-NEXT: ret <4 x i16> [[R]]
+;
+; LE-LABEL: @high_index_same_length_poison_basevec(
+; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
+; LE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 3
+; LE-NEXT: ret <4 x i16> [[R]]
;
%t = trunc i64 %x to i16
%r = insertelement <4 x i16> poison, i16 %t, i64 3
@@ -39,10 +49,15 @@ define <4 x i16> @wrong_index_same_length_poison_basevec(i64 %x) {
}
define <8 x i16> @low_index_longer_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @low_index_longer_length_poison_basevec(
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0
-; ALL-NEXT: ret <8 x i16> [[R]]
+; BE-LABEL: @low_index_longer_length_poison_basevec(
+; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
+; BE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 0
+; BE-NEXT: ret <8 x i16> [[R]]
+;
+; LE-LABEL: @low_index_longer_length_poison_basevec(
+; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
+; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; LE-NEXT: ret <8 x i16> [[R]]
;
%t = trunc i64 %x to i16
%r = insertelement <8 x i16> poison, i16 %t, i64 0
@@ -50,10 +65,15 @@ define <8 x i16> @low_index_longer_length_poison_basevec(i64 %x) {
}
define <8 x i16> @high_index_longer_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @high_index_longer_length_poison_basevec(
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3
-; ALL-NEXT: ret <8 x i16> [[R]]
+; BE-LABEL: @high_index_longer_length_poison_basevec(
+; BE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
+; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; BE-NEXT: ret <8 x i16> [[R]]
+;
+; LE-LABEL: @high_index_longer_length_poison_basevec(
+; LE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
+; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3
+; LE-NEXT: ret <8 x i16> [[R]]
;
%t = trunc i64 %x to i16
%r = insertelement <8 x i16> poison, i16 %t, i64 3
@@ -72,10 +92,15 @@ define <8 x i16> @wrong_index_longer_length_poison_basevec(i64 %x) {
}
define <2 x i16> @low_index_shorter_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @low_index_shorter_length_poison_basevec(
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0
-; ALL-NEXT: ret <2 x i16> [[R]]
+; BE-LABEL: @low_index_shorter_length_poison_basevec(
+; BE-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i16
+; BE-NEXT: [[R:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i64 0
+; BE-NEXT: ret <2 x i16> [[R]]
+;
+; LE-LABEL: @low_index_shorter_length_poison_basevec(
+; LE-NEXT: [[VEC_X:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
+; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_X]], <4 x i16> poison, <2 x i32> <i32 0, i32 undef>
+; LE-NEXT: ret <2 x i16> [[R]]
;
%t = trunc i64 %x to i16
%r = insertelement <2 x i16> poison, i16 %t, i64 0
@@ -144,11 +169,17 @@ define <4 x i16> @lshr_same_length_poison_basevec_be(i64 %x) {
}
define <4 x i16> @lshr_same_length_poison_basevec_both_endian(i64 %x) {
-; ALL-LABEL: @lshr_same_length_poison_basevec_both_endian(
-; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0
-; ALL-NEXT: ret <4 x i16> [[R]]
+; BE-LABEL: @lshr_same_length_poison_basevec_both_endian(
+; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
+; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16
+; BE-NEXT: [[R:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i64 0
+; BE-NEXT: ret <4 x i16> [[R]]
+;
+; LE-LABEL: @lshr_same_length_poison_basevec_both_endian(
+; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
+; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16>
+; LE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; LE-NEXT: ret <4 x i16> [[R]]
;
%s = lshr i64 %x, 48
%t = trunc i64 %s to i16
@@ -170,11 +201,17 @@ define <4 x i16> @lshr_wrong_index_same_length_poison_basevec(i64 %x) {
}
define <8 x i16> @lshr_longer_length_poison_basevec_le(i64 %x) {
-; ALL-LABEL: @lshr_longer_length_poison_basevec_le(
-; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16
-; ALL-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3
-; ALL-NEXT: ret <8 x i16> [[R]]
+; BE-LABEL: @lshr_longer_length_poison_basevec_le(
+; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
+; BE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <4 x i16>
+; BE-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[VEC_S]], <4 x i16> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; BE-NEXT: ret <8 x i16> [[R]]
+;
+; LE-LABEL: @lshr_longer_length_poison_basevec_le(
+; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 48
+; LE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i16
+; LE-NEXT: [[R:%.*]] = insertelement <8 x i16> poison, i16 [[T]], i64 3
+; LE-NEXT: ret <8 x i16> [[R]]
;
%s = lshr i64 %x, 48
%t = trunc i64 %s to i16
@@ -248,11 +285,17 @@ define <4 x i8> @lshr_wrong_index_shorter_length_poison_basevec(i64 %x) {
}
define <4 x i8> @lshr_wrong_shift_shorter_length_poison_basevec(i64 %x) {
-; ALL-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec(
-; ALL-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57
-; ALL-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8
-; ALL-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0
-; ALL-NEXT: ret <4 x i8> [[R]]
+; BE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec(
+; BE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57
+; BE-NEXT: [[T:%.*]] = trunc i64 [[S]] to i8
+; BE-NEXT: [[R:%.*]] = insertelement <4 x i8> poison, i8 [[T]], i64 0
+; BE-NEXT: ret <4 x i8> [[R]]
+;
+; LE-LABEL: @lshr_wrong_shift_shorter_length_poison_basevec(
+; LE-NEXT: [[S:%.*]] = lshr i64 [[X:%.*]], 57
+; LE-NEXT: [[VEC_S:%.*]] = bitcast i64 [[S]] to <8 x i8>
+; LE-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[VEC_S]], <8 x i8> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; LE-NEXT: ret <4 x i8> [[R]]
;
%s = lshr i64 %x, 57
%t = trunc i64 %s to i8
diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
index 79c3d37..2fd7b4b 100644
--- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
@@ -83,21 +83,20 @@ ret:
define void @nocopy(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-LABEL: @nocopy(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0
-; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]]
-; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10
-; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10
+; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
+; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
; CHECK: ret:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
index 1bdc217..76ba2eb 100644
--- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -83,21 +83,20 @@ ret:
define void @nocopy(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-LABEL: @nocopy(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[VAL:%.*]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> undef, i32 [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[VEC_VAL:%.*]] = bitcast i64 [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VEC_VAL]], <2 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i32> [[TMP0]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP4]], i64 0
-; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP2:%.*]] = phi <16 x i32> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ELT:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[ELTCOPY:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp ult i32 [[ELT]], [[LIMIT:%.*]]
-; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[ELTCOPY]], 10
-; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[ELT]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[ELTCOPY]], 10
+; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[ELT]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP4]]
+; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[INC]] = add <16 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
; CHECK: ret:
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index 99e5bee..699d64a 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -155,22 +155,21 @@ end:
define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
; CHECK-LABEL: @pointer_phi_v8i16_add1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[VEC_Y:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16>
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[VEC_Y]], <2 x i16> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
+; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
+; CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[TMP2]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
index 1ded4db..63b90ac 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
@@ -50,15 +50,15 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de
; SSE-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16
; SSE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8
; SSE-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8
-; SSE-NEXT: [[TMP1:%.*]] = lshr i64 [[V_VAL20]], 32
-; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[V_VAL20]], i64 0
-; SSE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP1]], i64 1
-; SSE-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
-; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; SSE-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32
-; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2
-; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3
-; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+; SSE-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32>
+; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; SSE-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32
+; SSE-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1
+; SSE-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32
+; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2
+; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3
+; SSE-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float>
; SSE-NEXT: ret <4 x float> [[VECINIT16]]
;
; AVX-LABEL: @ConvertVectors_ByVal(
@@ -66,15 +66,15 @@ define noundef <4 x float> @ConvertVectors_ByVal(ptr noundef nonnull align 16 de
; AVX-NEXT: [[V_VAL20:%.*]] = load i64, ptr [[V:%.*]], align 16
; AVX-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[V]], i64 8
; AVX-NEXT: [[V_VAL421:%.*]] = load i64, ptr [[TMP0]], align 8
-; AVX-NEXT: [[TMP1:%.*]] = trunc i64 [[V_VAL20]] to i32
-; AVX-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i64 0
-; AVX-NEXT: [[TMP3:%.*]] = lshr i64 [[V_VAL20]], 32
-; AVX-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i64 1
-; AVX-NEXT: [[TMP6:%.*]] = trunc i64 [[V_VAL421]] to i32
-; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i64 2
-; AVX-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP6]], i64 3
-; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
+; AVX-NEXT: [[VEC_V_VAL20:%.*]] = bitcast i64 [[V_VAL20]] to <2 x i32>
+; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_V_VAL20]], <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; AVX-NEXT: [[TMP2:%.*]] = lshr i64 [[V_VAL20]], 32
+; AVX-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; AVX-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP3]], i64 1
+; AVX-NEXT: [[TMP5:%.*]] = trunc i64 [[V_VAL421]] to i32
+; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i64 2
+; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP5]], i64 3
+; AVX-NEXT: [[VECINIT16:%.*]] = bitcast <4 x i32> [[TMP7]] to <4 x float>
; AVX-NEXT: ret <4 x float> [[VECINIT16]]
;
entry: