aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll659
1 files changed, 659 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll
new file mode 100644
index 0000000..23bc21a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll
@@ -0,0 +1,659 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 6
+; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx"
+
+define void @test_2xi64_mul_add(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_add(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ store i64 %add.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = add i64 %mul.1, 2
+ store i64 %add.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mixed_opcodes1(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mixed_opcodes1(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = xor <2 x i64> [[TMP5]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ store i64 %add.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = xor i64 %mul.1, 2
+ store i64 %add.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mixed_opcodes2(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mixed_opcodes2(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = xor i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ store i64 %add.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = add i64 %mul.1, 2
+ store i64 %add.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_sub(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_sub(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %sub.0 = sub i64 %mul.0, 2
+ store i64 %sub.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %sub.1 = sub i64 %mul.1, 2
+ store i64 %sub.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_sub_mismatched_ops1(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops1(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 3)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %sub.0 = sub i64 %mul.0, 2
+ store i64 %sub.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %sub.1 = sub i64 %mul.1, 3
+ store i64 %sub.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_sub_mismatched_ops2(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops2(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3)
+; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2)
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, 3
+ %sub.0 = sub i64 %mul.0, 2
+ store i64 %sub.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %sub.1 = sub i64 %mul.1, 2
+ store i64 %sub.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_sub_mismatched_op_order(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_op_order(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> splat (i64 2), [[TMP5]]
+; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %sub.0 = sub i64 %mul.0, 2
+ store i64 %sub.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %sub.1 = sub i64 2, %mul.1
+ store i64 %sub.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_add_xor(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_add_xor(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2)
+; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ %xor.0 = xor i64 4, %add.0
+ store i64 %xor.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = add i64 %mul.1, 2
+ %xor.1 = xor i64 4, %add.1
+ store i64 %xor.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_add_xor_mismatched_opcodes1(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes1(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2)
+; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ %xor.0 = xor i64 4, %add.0
+ store i64 %xor.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = sub i64 %l.factor, %l.1
+ %add.1 = add i64 %mul.1, 2
+ %xor.1 = xor i64 4, %add.1
+ store i64 %xor.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_add_xor_mismatched_opcodes2(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes2(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]]
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 2)
+; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, %l.0
+ %add.0 = add i64 %mul.0, 2
+ %xor.0 = xor i64 4, %add.0
+ store i64 %xor.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = mul i64 %mul.1, 2
+ %xor.1 = xor i64 4, %add.1
+ store i64 %xor.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_2xi64_mul_add_xor_mismatched_ops(ptr noalias %data, ptr noalias %factor) {
+; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_ops(
+; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) {
+; VF2-NEXT: [[ENTRY:.*:]]
+; VF2-NEXT: br label %[[VECTOR_PH:.*]]
+; VF2: [[VECTOR_PH]]:
+; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF2: [[VECTOR_BODY]]:
+; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
+; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
+; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1
+; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]]
+; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
+; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>
+; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3)
+; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2)
+; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]]
+; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]]
+; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2)
+; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]]
+; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
+; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VF2: [[MIDDLE_BLOCK]]:
+; VF2-NEXT: br label %[[EXIT:.*]]
+; VF2: [[EXIT]]:
+; VF2-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv
+ %l.factor = load i64, ptr %arrayidx, align 8
+ %idx.0 = shl nsw i64 %iv, 1
+ %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0
+ %l.0 = load i64, ptr %data.0, align 8
+ %mul.0 = mul i64 %l.factor, 3
+ %add.0 = add i64 %mul.0, 2
+ %xor.0 = xor i64 4, %add.0
+ store i64 %xor.0, ptr %data.0, align 8
+ %idx.1 = or disjoint i64 %idx.0, 1
+ %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1
+ %l.1 = load i64, ptr %data.1, align 8
+ %mul.1 = mul i64 %l.factor, %l.1
+ %add.1 = add i64 %mul.1, 2
+ %xor.1 = xor i64 4, %add.1
+ store i64 %xor.1, ptr %data.1, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 100
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}