; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 ; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s ; Test loads that are only used as live-outs in early exit loops. ; Test load in header block only used as live-out (dereferenceable). define i8 @header_load_live_out_deref(ptr dereferenceable(1024) %A, ptr dereferenceable(1024) %B) { ; CHECK-LABEL: define i8 @header_load_live_out_deref( ; CHECK-SAME: ptr dereferenceable(1024) [[A:%.*]], ptr dereferenceable(1024) [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64 ; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] ; CHECK: [[VECTOR_BODY_INTERIM]]: ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 64, %[[LOOP_END]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_LATCH]], label %[[LOOP_END1]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67 ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END1]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP_HEADER]] ], [ 0, %[[LOOP_LATCH]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i8 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv %ld1 = load i8, ptr %gep.A, align 1 %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv %ld2 = load i8, ptr %gep.B, align 1 %cmp3 = icmp eq i8 %ld1, %ld2 br i1 %cmp3, label %loop.latch, label %loop.end loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 67 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i8 [ %ld1, %loop.header ], [ 0, %loop.latch ] ret i8 %retval } ; Test load in latch block only used as live-out (dereferenceable). define i8 @latch_load_live_out_deref(ptr dereferenceable(1024) %A, ptr dereferenceable(1024) %B) { ; CHECK-LABEL: define i8 @latch_load_live_out_deref( ; CHECK-SAME: ptr dereferenceable(1024) [[A:%.*]], ptr dereferenceable(1024) [[B:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64 ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] ; CHECK: [[VECTOR_BODY_INTERIM]]: ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[LOOP_END:.*]] ; CHECK: [[VECTOR_EARLY_EXIT]]: ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: br label %[[LOOP_END1:.*]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 64, %[[LOOP_END]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_LATCH]], label %[[LOOP_END1]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_A_NEXT:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_A_NEXT]], align 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67 ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END1]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[LOOP_END1]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP_HEADER]] ], [ [[LD3]], %[[LOOP_LATCH]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i8 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv %ld1 = load i8, ptr %gep.A, align 1 %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv %ld2 = load i8, ptr %gep.B, align 1 %cmp3 = icmp eq i8 %ld1, %ld2 br i1 %cmp3, label %loop.latch, label %loop.end loop.latch: %iv.next = add i64 %iv, 1 %gep.A.next = getelementptr inbounds i8, ptr %A, i64 %iv.next %ld3 = load i8, ptr %gep.A.next, align 1 %ec = icmp ne i64 %iv.next, 67 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i8 [ %ld1, %loop.header ], [ %ld3, %loop.latch ] ret i8 %retval } ; Test load in header block only used as live-out (unknown ptr, needs speculative load). define i8 @header_load_live_out_unknown(ptr %A, ptr %B) { ; CHECK-LABEL: define i8 @header_load_live_out_unknown( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] ; CHECK: [[LOOP_INC]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP]] ], [ 0, %[[LOOP_INC]] ] ; CHECK-NEXT: ret i8 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv %ld1 = load i8, ptr %gep.A, align 1 %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv %ld2 = load i8, ptr %gep.B, align 1 %cmp3 = icmp eq i8 %ld1, %ld2 br i1 %cmp3, label %loop.latch, label %loop.end loop.latch: %iv.next = add i64 %iv, 1 %ec = icmp ne i64 %iv.next, 67 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i8 [ %ld1, %loop.header ], [ 0, %loop.latch ] ret i8 %retval } ; Test load in latch block only used as live-out (unknown ptr, needs speculative load). define i8 @latch_load_live_out_unknown(ptr %A, ptr %B) { ; CHECK-LABEL: define i8 @latch_load_live_out_unknown( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: [[SCALAR_PH:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] ; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]] ; CHECK: [[LOOP_INC]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX_NEXT]] ; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: ; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP]] ], [ [[LD3]], %[[LOOP_INC]] ] ; CHECK-NEXT: ret i8 [[RETVAL]] ; entry: br label %loop.header loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv %ld1 = load i8, ptr %gep.A, align 1 %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv %ld2 = load i8, ptr %gep.B, align 1 %cmp3 = icmp eq i8 %ld1, %ld2 br i1 %cmp3, label %loop.latch, label %loop.end loop.latch: %iv.next = add i64 %iv, 1 %gep.A.next = getelementptr inbounds i8, ptr %A, i64 %iv.next %ld3 = load i8, ptr %gep.A.next, align 1 %ec = icmp ne i64 %iv.next, 100 br i1 %ec, label %loop.header, label %loop.end loop.end: %retval = phi i8 [ %ld1, %loop.header ], [ %ld3, %loop.latch ] ret i8 %retval }