; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 ; RUN: opt -S < %s -p loop-vectorize -force-vector-width=4 | FileCheck %s ; @A = global [64 x i8] zeroinitializer @B = global [64 x i8] zeroinitializer @C = global [64 x i8] zeroinitializer define i64 @single_exit_in_conditional_block() { ; CHECK-LABEL: define i64 @single_exit_in_conditional_block() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: br label %[[BLOCK_A:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_END1]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP9]], %[[LOOP_END]] ], [ -1, %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %loop.latch block.a: %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv %l.B = load i8, ptr %gep.B, align 1 %cmp = icmp eq i8 %l.A, %l.B br i1 %cmp, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %iv, %block.a ], [ -1, %loop.latch ] ret i64 %retval } define i64 @single_exit_in_conditional_block_with_store() { ; CHECK-LABEL: define i64 @single_exit_in_conditional_block_with_store() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[BRANCH_COND:%.*]] = icmp slt i8 [[L_A]], 0 ; CHECK-NEXT: br i1 [[BRANCH_COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[L_A]], [[L_B]] ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] ; CHECK: [[BLOCK_B]]: ; CHECK-NEXT: store i8 0, ptr [[GEP_B]], align 1 ; CHECK-NEXT: br label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 64 ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], %[[BLOCK_A]] ], [ -1, %[[LOOP_LATCH]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %block.b block.a: %l.B = load i8, ptr %gep.B, align 1 %cmp = icmp eq i8 %l.A, %l.B br i1 %cmp, label %loop.end, label %loop.latch block.b: store i8 0, ptr %gep.B br label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %iv, %block.a ], [ -1, %loop.latch ] ret i64 %retval } define i64 @single_exit_in_conditional_block2() { ; CHECK-LABEL: define i64 @single_exit_in_conditional_block2() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: br label %[[BLOCK_A:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[MERGE:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_END:.*]], label %[[MERGE]] ; CHECK: [[MERGE]]: ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_LATCH:.*]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP9]], %[[LOOP_END]] ], [ -1, %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %merge block.a: %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv %l.B = load i8, ptr %gep.B, align 1 %cmp = icmp eq i8 %l.A, %l.B br i1 %cmp, label %loop.end, label %merge merge: br label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %iv, %block.a ], [ -1, %loop.latch ] ret i64 %retval } ; Variant: exit condition defined in header but used in conditional block. define i64 @exit_cond_defined_in_header() { ; CHECK-LABEL: define i64 @exit_cond_defined_in_header() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]] ; CHECK-NEXT: [[BRANCH_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[BRANCH_COND]], label %[[BLOCK_A:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: br i1 [[TMP6]], label %[[LOOP_END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: br label %[[LOOP_END1]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 10, %[[BLOCK_A]] ], [ 63, %[[LOOP_END]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 %exit.cond = icmp eq i8 %l.A, 1 br i1 %branch.cond, label %block.a, label %loop.latch block.a: br i1 %exit.cond, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ 10, %block.a ], [ %iv, %loop.latch ] ret i64 %retval } define i64 @livein_exit_cond_in_conditional(i1 %exit.cond) { ; CHECK-LABEL: define i64 @livein_exit_cond_in_conditional( ; CHECK-SAME: i1 [[EXIT_COND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[EXIT_COND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[BRANCH_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[BRANCH_COND]], label %[[BLOCK_A:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOOP_END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[IV]], [[TMP6]] ; CHECK-NEXT: br label %[[LOOP_END1]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP7]], %[[BLOCK_A]] ], [ 99, %[[LOOP_END]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %loop.latch block.a: br i1 %exit.cond, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %iv, %block.a ], [ 99, %loop.latch ] ret i64 %retval } define i64 @livein_exit_cond_in_conditional2(i1 %exit.cond) { ; CHECK-LABEL: define i64 @livein_exit_cond_in_conditional2( ; CHECK-SAME: i1 [[EXIT_COND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[EXIT_COND]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[BRANCH_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[BRANCH_COND]], label %[[BLOCK_A:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOOP_END:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: br label %[[LOOP_END1]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 99, %[[BLOCK_A]] ], [ 63, %[[LOOP_END]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %loop.latch block.a: br i1 %exit.cond, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ 99, %block.a ], [ %iv, %loop.latch ] ret i64 %retval } define i64 @diamond_with_join_then_exit() { ; CHECK-LABEL: define i64 @diamond_with_join_then_exit() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP5]], <4 x i64> [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[PREDPHI]], splat (i64 42) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] ; CHECK: [[VECTOR_BODY_INTERIM]]: ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 false) ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[PREDPHI]], i64 [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: br label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %block.b block.a: %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv %l.B = load i8, ptr %gep.B, align 1 %val.a = zext i8 %l.B to i64 br label %join block.b: %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv %l.C = load i8, ptr %gep.C, align 1 %val.b = zext i8 %l.C to i64 br label %join join: %val = phi i64 [ %val.a, %block.a ], [ %val.b, %block.b ] %cmp = icmp eq i64 %val, 42 br i1 %cmp, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %val, %join ], [ 0, %loop.latch ] ret i64 %retval } define i64 @diamond_with_join_then_exit_and_store() { ; CHECK-LABEL: define i64 @diamond_with_join_then_exit_and_store() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] ; CHECK-NEXT: [[L_A:%.*]] = load i8, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[BRANCH_COND:%.*]] = icmp slt i8 [[L_A]], 0 ; CHECK-NEXT: br i1 [[BRANCH_COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B:.*]] ; CHECK: [[BLOCK_A]]: ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] ; CHECK-NEXT: [[L_B:%.*]] = load i8, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[VAL_A:%.*]] = zext i8 [[L_B]] to i64 ; CHECK-NEXT: br label %[[JOIN:.*]] ; CHECK: [[BLOCK_B]]: ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] ; CHECK-NEXT: [[L_C:%.*]] = load i8, ptr [[GEP_C]], align 1 ; CHECK-NEXT: store i8 0, ptr [[GEP_C]], align 1 ; CHECK-NEXT: [[VAL_B:%.*]] = zext i8 [[L_C]] to i64 ; CHECK-NEXT: br label %[[JOIN]] ; CHECK: [[JOIN]]: ; CHECK-NEXT: [[VAL:%.*]] = phi i64 [ [[VAL_A]], %[[BLOCK_A]] ], [ [[VAL_B]], %[[BLOCK_B]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[VAL]], 42 ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 64 ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[VAL]], %[[JOIN]] ], [ 0, %[[LOOP_LATCH]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv %l.A = load i8, ptr %gep.A, align 1 %branch.cond = icmp slt i8 %l.A, 0 br i1 %branch.cond, label %block.a, label %block.b block.a: %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv %l.B = load i8, ptr %gep.B, align 1 %val.a = zext i8 %l.B to i64 br label %join block.b: %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv %l.C = load i8, ptr %gep.C, align 1 store i8 0, ptr %gep.C %val.b = zext i8 %l.C to i64 br label %join join: %val = phi i64 [ %val.a, %block.a ], [ %val.b, %block.b ] %cmp = icmp eq i64 %val, 42 br i1 %cmp, label %loop.end, label %loop.latch loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 64 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i64 [ %val, %join ], [ 0, %loop.latch ] ret i64 %retval }