; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 ; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true %s | FileCheck %s %struct.i32.pair = type { i32, i32 } define void @nusw_preservation(ptr noalias %A, ptr %B) { ; CHECK-LABEL: define void @nusw_preservation( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw i32, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr nusw i32, ptr [[TMP6]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: %iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ] %x = getelementptr nusw %struct.i32.pair, ptr %A, i64 %iv, i32 0 %load.x = load i32, ptr %x, align 4 %trunc = trunc i64 %iv to i32 %add = add nsw i32 %load.x, %trunc %y = getelementptr nusw %struct.i32.pair, ptr %A, i64 %iv, i32 1 %load.y = load i32, ptr %y, align 4 %sub = sub nsw i32 %load.y, %trunc %gep.B.iv.0 = getelementptr nusw %struct.i32.pair, ptr %B, i64 %iv, i32 0 store i32 %add, ptr %gep.B.iv.0, align 4 %gep.B.iv.1 = getelementptr nusw %struct.i32.pair, ptr %B, i64 %iv, i32 1 store i32 %sub, ptr %gep.B.iv.1, align 4 %iv.next = add nsw i64 %iv, -1 %exit.cond = icmp sgt i64 %iv, 0 br i1 %exit.cond, label %loop, label %exit exit: ret void } define void @inbounds_preservation(ptr noalias %A, ptr %B) { ; CHECK-LABEL: define void @inbounds_preservation( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: %iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ] %x = getelementptr inbounds %struct.i32.pair, ptr %A, i64 %iv, i32 0 %load.x = load i32, ptr %x, align 4 %trunc = trunc i64 %iv to i32 %add = add nsw i32 %load.x, %trunc %y = getelementptr inbounds %struct.i32.pair, ptr %A, i64 %iv, i32 1 %load.y = load i32, ptr %y, align 4 %sub = sub nsw i32 %load.y, %trunc %gep.B.iv.0 = getelementptr inbounds %struct.i32.pair, ptr %B, i64 %iv, i32 0 store i32 %add, ptr %gep.B.iv.0, align 4 %gep.B.iv.1 = getelementptr inbounds %struct.i32.pair, ptr %B, i64 %iv, i32 1 store i32 %sub, ptr %gep.B.iv.1, align 4 %iv.next = add nsw i64 %iv, -1 %exit.cond = icmp sgt i64 %iv, 0 br i1 %exit.cond, label %loop, label %exit exit: ret void } define void @nuw_drop(ptr noalias %A, ptr %B) { ; CHECK-LABEL: define void @nuw_drop( ; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR:%.*]], ptr [[A]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 -6 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw [[STRUCT_I32_PAIR]], ptr [[B]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i64 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 -6 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: %iv = phi i64 [ 1023, %entry ], [ %iv.next, %loop ] %x = getelementptr nuw %struct.i32.pair, ptr %A, i64 %iv, i32 0 %load.x = load i32, ptr %x, align 4 %trunc = trunc i64 %iv to i32 %add = add nsw i32 %load.x, %trunc %y = getelementptr nuw %struct.i32.pair, ptr %A, i64 %iv, i32 1 %load.y = load i32, ptr %y, align 4 %sub = sub nsw i32 %load.y, %trunc %gep.B.iv.0 = getelementptr nuw %struct.i32.pair, ptr %B, i64 %iv, i32 0 store i32 %add, ptr %gep.B.iv.0, align 4 %gep.B.iv.1 = getelementptr nuw %struct.i32.pair, ptr %B, i64 %iv, i32 1 store i32 %sub, ptr %gep.B.iv.1, align 4 %iv.next = add nsw i64 %iv, -1 %exit.cond = icmp sgt i64 %iv, 0 br i1 %exit.cond, label %loop, label %exit exit: ret void } define void @nusw_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-LABEL: define void @nusw_preservation_2( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i8, ptr [[TMP1]], i32 -1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: ; preds = %loop, %entry %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ] %or.1 = or disjoint i64 %iv2, 1 %gep.src.or.1 = getelementptr nusw i8, ptr %src, i64 %or.1 %load.src.1 = load i8, ptr %gep.src.or.1, align 1 %gep.src.iv2 = getelementptr nusw i8, ptr %src, i64 %iv2 %load.src.2 = load i8, ptr %gep.src.iv2, align 1 %add = add i8 %load.src.1, %load.src.2 %gep.dst.iv = getelementptr nusw i8, ptr %dst, i64 %iv store i8 %add, ptr %gep.dst.iv, align 1 %iv2.next = add i64 %iv2, 2 %iv.next = add i64 %iv, 1 %exit.cond = icmp eq i64 %iv.next, 100 br i1 %exit.cond, label %exit, label %loop exit: ret void } define void @inbounds_preservation_2(ptr %src, ptr noalias %dst) { ; CHECK-LABEL: define void @inbounds_preservation_2( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: ; preds = %loop, %entry %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ] %or.1 = or disjoint i64 %iv2, 1 %gep.src.or.1 = getelementptr inbounds i8, ptr %src, i64 %or.1 %load.src.1 = load i8, ptr %gep.src.or.1, align 1 %gep.src.iv2 = getelementptr inbounds i8, ptr %src, i64 %iv2 %load.src.2 = load i8, ptr %gep.src.iv2, align 1 %add = add i8 %load.src.1, %load.src.2 %gep.dst.iv = getelementptr inbounds i8, ptr %dst, i64 %iv store i8 %add, ptr %gep.dst.iv, align 1 %iv2.next = add i64 %iv2, 2 %iv.next = add i64 %iv, 1 %exit.cond = icmp eq i64 %iv.next, 100 br i1 %exit.cond, label %exit, label %loop exit: ret void } define void @nuw_drop_2(ptr %src, ptr noalias %dst) { ; CHECK-LABEL: define void @nuw_drop_2( ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i8, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i8> [[WIDE_VEC]], <8 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC1]], [[STRIDED_VEC]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nuw i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[EXIT:label %.*]] ; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop loop: ; preds = %loop, %entry %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ] %or.1 = or disjoint i64 %iv2, 1 %gep.src.or.1 = getelementptr nuw i8, ptr %src, i64 %or.1 %load.src.1 = load i8, ptr %gep.src.or.1, align 1 %gep.src.iv2 = getelementptr nuw i8, ptr %src, i64 %iv2 %load.src.2 = load i8, ptr %gep.src.iv2, align 1 %add = add i8 %load.src.1, %load.src.2 %gep.dst.iv = getelementptr nuw i8, ptr %dst, i64 %iv store i8 %add, ptr %gep.dst.iv, align 1 %iv2.next = add i64 %iv2, 2 %iv.next = add i64 %iv, 1 %exit.cond = icmp eq i64 %iv.next, 100 br i1 %exit.cond, label %exit, label %loop exit: ret void }