diff options
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll | 659 |
1 files changed, 659 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll new file mode 100644 index 0000000..23bc21a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops-chained.ll @@ -0,0 +1,659 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF2 %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx" + +define void @test_2xi64_mul_add(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mixed_opcodes1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mixed_opcodes1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = xor <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = xor i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mixed_opcodes2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mixed_opcodes2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = xor <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = xor i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + store i64 %add.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + store i64 %add.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 2 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_ops1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 3) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 3 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_ops2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_ops2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP5]], splat (i64 2) +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, 3 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 %mul.1, 2 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_sub_mismatched_op_order(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_sub_mismatched_op_order( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = sub <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> splat (i64 2), [[TMP5]] +; VF2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %sub.0 = sub i64 %mul.0, 2 + store i64 %sub.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %sub.1 = sub i64 2, %mul.1 + store i64 %sub.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_opcodes1(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes1( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = sub i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_opcodes2(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_opcodes2( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC]] +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, %l.0 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = mul i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_2xi64_mul_add_xor_mismatched_ops(ptr noalias %data, ptr noalias %factor) { +; VF2-LABEL: define void @test_2xi64_mul_add_xor_mismatched_ops( +; VF2-SAME: ptr noalias [[DATA:%.*]], ptr noalias [[FACTOR:%.*]]) { +; VF2-NEXT: [[ENTRY:.*:]] +; VF2-NEXT: br label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = shl nsw i64 [[INDEX]], 1 +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] +; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> +; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3> +; VF2-NEXT: [[TMP3:%.*]] = mul <2 x i64> [[WIDE_LOAD]], splat (i64 3) +; VF2-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMP3]], splat (i64 2) +; VF2-NEXT: [[TMP5:%.*]] = xor <2 x i64> splat (i64 4), [[TMP4]] +; VF2-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1]] +; VF2-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP6]], splat (i64 2) +; VF2-NEXT: [[TMP8:%.*]] = xor <2 x i64> splat (i64 4), [[TMP7]] +; VF2-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> +; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8 +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: br label %[[EXIT:.*]] +; VF2: [[EXIT]]: +; VF2-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i64, ptr %factor, i64 %iv + %l.factor = load i64, ptr %arrayidx, align 8 + %idx.0 = shl nsw i64 %iv, 1 + %data.0 = getelementptr inbounds i64, ptr %data, i64 %idx.0 + %l.0 = load i64, ptr %data.0, align 8 + %mul.0 = mul i64 %l.factor, 3 + %add.0 = add i64 %mul.0, 2 + %xor.0 = xor i64 4, %add.0 + store i64 %xor.0, ptr %data.0, align 8 + %idx.1 = or disjoint i64 %idx.0, 1 + %data.1 = getelementptr inbounds i64, ptr %data, i64 %idx.1 + %l.1 = load i64, ptr %data.1, align 8 + %mul.1 = mul i64 %l.factor, %l.1 + %add.1 = add i64 %mul.1, 2 + %xor.1 = xor i64 4, %add.1 + store i64 %xor.1, ptr %data.1, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} |
