diff options
Diffstat (limited to 'llvm/test/Transforms')
244 files changed, 2559 insertions, 11815 deletions
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll new file mode 100644 index 0000000..346eaea --- /dev/null +++ b/llvm/test/Transforms/CorrelatedValuePropagation/pr161367.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s + +; Make sure that we apply trunc to the edge value of %x. +@g = external global i8 + +define i16 @pr161367(i64 %x) { +; CHECK-LABEL: define i16 @pr161367( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[X]] to i16 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[X]], sub (i64 0, i64 ptrtoint (ptr @g to i64)) +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RET:%.*]] = phi i16 [ trunc (i64 sub (i64 0, i64 ptrtoint (ptr @g to i64)) to i16), %[[ENTRY]] ], [ 0, %[[ELSE]] ] +; CHECK-NEXT: ret i16 [[RET]] +; +entry: + %trunc = trunc i64 %x to i16 + %exitcond = icmp eq i64 %x, sub (i64 0, i64 ptrtoint (ptr @g to i64)) + br i1 %exitcond, label %exit, label %else + +else: + br label %exit + +exit: + %ret = phi i16 [ %trunc, %entry ], [ 0, %else ] + ret i16 %ret +} diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index cba1ba8..ad05684 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -304,32 +304,43 @@ end: define void @pr106083_invalidBBarg_fold(i1 %cmp1, i1 %cmp2, i1 %not, ptr %d) { ; CHECK-LABEL: @pr106083_invalidBBarg_fold( ; CHECK-NEXT: bb: -; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[SEL_SI_UNFOLD_FALSE:%.*]] -; CHECK: sel.si.unfold.false: -; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] -; CHECK-NEXT: br label [[BB1]] +; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: BB1: -; CHECK-NEXT: [[I:%.*]] = phi i16 [ 0, [[BB1_BACKEDGE:%.*]] ], [ 0, [[BB]] ], [ 1, [[BB7:%.*]] ], [ 0, [[SEL_SI_UNFOLD_FALSE]] ], [ 1, [[BB7_JT0:%.*]] ] -; CHECK-NEXT: [[SEL_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB1_BACKEDGE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SEL_SI_UNFOLD_FALSE]] ], [ [[SEL_SI_UNFOLD_PHI]], [[BB7_JT0]] ] +; CHECK-NEXT: [[I:%.*]] = phi i16 [ 0, [[BB1_BACKEDGE:%.*]] ], [ 0, [[BB:%.*]] ], [ 1, [[BB9:%.*]] ], [ 1, [[BB7_JT0:%.*]] ] ; CHECK-NEXT: br i1 [[NOT:%.*]], label [[BB7_JT0]], label [[BB2:%.*]] ; CHECK: BB2: ; CHECK-NEXT: store i16 0, ptr [[D:%.*]], align 2 -; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK-NEXT: br i1 [[CMP2:%.*]], label [[BB7:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0:%.*]] ; CHECK: spec.select.si.unfold.false: -; CHECK-NEXT: br label [[BB7]] +; CHECK-NEXT: br label [[BB9]] ; CHECK: spec.select.si.unfold.false.jt0: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB2]] ] ; CHECK-NEXT: br label [[BB7_JT0]] +; CHECK: sel.si.unfold.true: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB9]], label [[SEL_SI_UNFOLD_FALSE_JT1:%.*]] +; CHECK: sel.si.unfold.true.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 0, [[BB2]] ] +; CHECK-NEXT: br i1 [[CMP1]], label [[BB7_JT0]], label [[SEL_SI_UNFOLD_FALSE:%.*]] +; CHECK: sel.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 1, [[BB7]] ] +; CHECK-NEXT: br label [[BB9]] +; CHECK: sel.si.unfold.false.jt1: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2_JT1:%.*]] = phi i32 [ 1, [[SEL_SI_UNFOLD_TRUE:%.*]] ] +; CHECK-NEXT: br label [[BB7_JT1:%.*]] ; CHECK: BB7: -; CHECK-NEXT: [[D_PROMOTED4:%.*]] = phi i16 [ 1, [[BB2]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] ] -; CHECK-NEXT: [[_3:%.*]] = phi i32 [ [[SEL_SI_UNFOLD_PHI]], [[BB2]] ], [ poison, [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[D_PROMOTED4:%.*]] = phi i16 [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] ], [ 1, [[SEL_SI_UNFOLD_TRUE]] ], [ 1, [[SEL_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: [[_3:%.*]] = phi i32 [ poison, [[SPEC_SELECT_SI_UNFOLD_FALSE]] ], [ poison, [[SEL_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI2]], [[SEL_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: switch i32 [[_3]], label [[BB1_BACKEDGE]] [ ; CHECK-NEXT: i32 0, label [[BB1]] ; CHECK-NEXT: i32 1, label [[BB8:%.*]] ; CHECK-NEXT: ] +; CHECK: BB7.jt1: +; CHECK-NEXT: [[D_PROMOTED4_JT1:%.*]] = phi i16 [ 1, [[SEL_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: [[_3_JT1:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SEL_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: br label [[BB8]] ; CHECK: BB7.jt0: -; CHECK-NEXT: [[D_PROMOTED4_JT0:%.*]] = phi i16 [ 0, [[BB1]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] -; CHECK-NEXT: [[_3_JT0:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[D_PROMOTED4_JT0:%.*]] = phi i16 [ 0, [[BB1]] ], [ 1, [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ], [ 1, [[BB7]] ] +; CHECK-NEXT: [[_3_JT0:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT_SI_UNFOLD_FALSE_JT0]] ], [ [[DOTSI_UNFOLD_PHI1]], [[BB7]] ] ; CHECK-NEXT: br label [[BB1]] ; CHECK: BB1.backedge: ; CHECK-NEXT: br label [[BB1]] @@ -367,30 +378,40 @@ BB8: ; preds = %BB7 define void @pr106083_select_dead_uses(i1 %cmp1, i1 %not, ptr %p) { ; CHECK-LABEL: @pr106083_select_dead_uses( ; CHECK-NEXT: bb: -; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[DOTLOOPEXIT6:%.*]], label [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] -; CHECK: spec.select.si.unfold.false: -; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 1, [[BB:%.*]] ] -; CHECK-NEXT: br label [[DOTLOOPEXIT6]] +; CHECK-NEXT: br label [[DOTLOOPEXIT6:%.*]] ; CHECK: .loopexit6: -; CHECK-NEXT: [[SPEC_SELECT_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[SELECT_UNFOLD:%.*]] ], [ 0, [[BB]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: br i1 [[NOT:%.*]], label [[SELECT_UNFOLD_JT0:%.*]], label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P:%.*]], align 4 ; CHECK-NEXT: [[NOT2:%.*]] = icmp eq i32 0, 0 -; CHECK-NEXT: br i1 [[NOT2]], label [[SELECT_UNFOLD]], label [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK-NEXT: br i1 [[NOT2]], label [[SELECT_UNFOLD:%.*]], label [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0:%.*]] ; CHECK: spec.select7.si.unfold.false: -; CHECK-NEXT: br label [[SELECT_UNFOLD]] +; CHECK-NEXT: br label [[SELECT_UNFOLD1:%.*]] ; CHECK: spec.select7.si.unfold.false.jt0: ; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[BB1]] ] ; CHECK-NEXT: br label [[SELECT_UNFOLD_JT0]] +; CHECK: spec.select.si.unfold.true: +; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[SELECT_UNFOLD1]], label [[SPEC_SELECT_SI_UNFOLD_FALSE_JT1:%.*]] +; CHECK: spec.select.si.unfold.true.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 0, [[BB1]] ] +; CHECK-NEXT: br i1 [[CMP1]], label [[SELECT_UNFOLD_JT0]], label [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 1, [[SELECT_UNFOLD]] ] +; CHECK-NEXT: br label [[SELECT_UNFOLD1]] +; CHECK: spec.select.si.unfold.false.jt1: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2_JT1:%.*]] = phi i32 [ 1, [[SPEC_SELECT_SI_UNFOLD_TRUE:%.*]] ] +; CHECK-NEXT: br label [[SELECT_UNFOLD_JT1:%.*]] ; CHECK: select.unfold: -; CHECK-NEXT: [[_2:%.*]] = phi i32 [ [[SPEC_SELECT_SI_UNFOLD_PHI]], [[BB1]] ], [ poison, [[SPEC_SELECT7_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: [[_2:%.*]] = phi i32 [ poison, [[SPEC_SELECT7_SI_UNFOLD_FALSE:%.*]] ], [ poison, [[SPEC_SELECT_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI2]], [[SPEC_SELECT_SI_UNFOLD_FALSE]] ] ; CHECK-NEXT: switch i32 [[_2]], label [[BB2:%.*]] [ ; CHECK-NEXT: i32 0, label [[DOTPREHEADER_PREHEADER:%.*]] ; CHECK-NEXT: i32 1, label [[DOTLOOPEXIT6]] ; CHECK-NEXT: ] +; CHECK: select.unfold.jt1: +; CHECK-NEXT: [[_2_JT1:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI2_JT1]], [[SPEC_SELECT_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: br label [[DOTLOOPEXIT6]] ; CHECK: select.unfold.jt0: -; CHECK-NEXT: [[_2_JT0:%.*]] = phi i32 [ 0, [[DOTLOOPEXIT6]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[_2_JT0:%.*]] = phi i32 [ 0, [[DOTLOOPEXIT6]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT7_SI_UNFOLD_FALSE_JT0]] ], [ [[DOTSI_UNFOLD_PHI1]], [[SELECT_UNFOLD]] ] ; CHECK-NEXT: br label [[DOTPREHEADER_PREHEADER]] ; CHECK: .preheader.preheader: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll index 93872c3..663f459 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll @@ -463,3 +463,87 @@ unreachable: sw.bb: ; preds = %if.end br label %while.cond } + +define i16 @pr160250() { +; CHECK-LABEL: @pr160250( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND48:%.*]] +; CHECK: for.cond48: +; CHECK-NEXT: br i1 false, label [[CLEANUP87_JT0:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br i1 false, label [[DOT6_SI_UNFOLD_TRUE:%.*]], label [[DOT5_SI_UNFOLD_TRUE:%.*]] +; CHECK: .5.si.unfold.true: +; CHECK-NEXT: br i1 false, label [[SPEC_SELECT1_SI_UNFOLD_TRUE1:%.*]], label [[DOT5_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: .5.si.unfold.true.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI1:%.*]] = phi i32 [ 0, [[IF_ELSE]] ] +; CHECK-NEXT: br i1 false, label [[SPEC_SELECT1_SI_UNFOLD_TRUE:%.*]], label [[DOT5_SI_UNFOLD_FALSE:%.*]] +; CHECK: .5.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2:%.*]] = phi i32 [ 0, [[DOT5_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: br label [[SPEC_SELECT1_SI_UNFOLD_TRUE1]] +; CHECK: .5.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI2_JT0:%.*]] = phi i32 [ 0, [[DOT5_SI_UNFOLD_TRUE1:%.*]] ] +; CHECK-NEXT: br label [[SPEC_SELECT1_SI_UNFOLD_TRUE]] +; CHECK: spec.select1.si.unfold.true: +; CHECK-NEXT: [[DOT5_SI_UNFOLD_PHI:%.*]] = phi i32 [ poison, [[DOT5_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI2]], [[DOT5_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: br i1 false, label [[SPEC_SELECT_SI_UNFOLD_FALSE1:%.*]], label [[SPEC_SELECT1_SI_UNFOLD_FALSE_JT1:%.*]] +; CHECK: spec.select1.si.unfold.true.jt0: +; CHECK-NEXT: [[DOT5_SI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ [[DOTSI_UNFOLD_PHI1]], [[DOT5_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI2_JT0]], [[DOT5_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br i1 false, label [[SPEC_SELECT_SI_UNFOLD_FALSE:%.*]], label [[SPEC_SELECT1_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: spec.select1.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI:%.*]] = phi i32 [ 0, [[SPEC_SELECT1_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: br label [[SPEC_SELECT_SI_UNFOLD_FALSE1]] +; CHECK: spec.select1.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[SPEC_SELECT1_SI_UNFOLD_TRUE1]] ] +; CHECK-NEXT: br label [[SPEC_SELECT_SI_UNFOLD_FALSE]] +; CHECK: spec.select.si.unfold.false: +; CHECK-NEXT: [[SPEC_SELECT1_SI_UNFOLD_PHI:%.*]] = phi i32 [ [[DOT5_SI_UNFOLD_PHI]], [[SPEC_SELECT1_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI]], [[SPEC_SELECT1_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[CLEANUP87:%.*]] +; CHECK: spec.select.si.unfold.false.jt0: +; CHECK-NEXT: [[SPEC_SELECT1_SI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ [[DOT5_SI_UNFOLD_PHI_JT0]], [[SPEC_SELECT1_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SPEC_SELECT1_SI_UNFOLD_FALSE_JT1]] ] +; CHECK-NEXT: br label [[CLEANUP87_JT0]] +; CHECK: .6.si.unfold.true: +; CHECK-NEXT: br i1 false, label [[CLEANUP87]], label [[DOT6_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: .6.si.unfold.true.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI3:%.*]] = phi i32 [ 0, [[IF_ELSE]] ] +; CHECK-NEXT: br i1 false, label [[CLEANUP87_JT0]], label [[DOT6_SI_UNFOLD_FALSE:%.*]] +; CHECK: .6.si.unfold.false: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI4:%.*]] = phi i32 [ 0, [[DOT6_SI_UNFOLD_TRUE]] ] +; CHECK-NEXT: br label [[CLEANUP87]] +; CHECK: .6.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI4_JT0:%.*]] = phi i32 [ 0, [[DOT6_SI_UNFOLD_TRUE1:%.*]] ] +; CHECK-NEXT: br label [[CLEANUP87_JT0]] +; CHECK: cleanup87: +; CHECK-NEXT: [[CLEANUP_DEST_SLOT_3:%.*]] = phi i32 [ [[SPEC_SELECT1_SI_UNFOLD_PHI]], [[SPEC_SELECT_SI_UNFOLD_FALSE1]] ], [ poison, [[DOT6_SI_UNFOLD_TRUE1]] ], [ [[DOTSI_UNFOLD_PHI4]], [[DOT6_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: switch i32 [[CLEANUP_DEST_SLOT_3]], label [[FOR_COND48_BACKEDGE:%.*]] [ +; CHECK-NEXT: i32 0, label [[FOR_COND48_BACKEDGE]] +; CHECK-NEXT: i32 1, label [[FOR_COND48_BACKEDGE]] +; CHECK-NEXT: ] +; CHECK: cleanup87.jt0: +; CHECK-NEXT: [[CLEANUP_DEST_SLOT_3_JT0:%.*]] = phi i32 [ 0, [[FOR_COND48]] ], [ [[SPEC_SELECT1_SI_UNFOLD_PHI_JT0]], [[SPEC_SELECT_SI_UNFOLD_FALSE]] ], [ [[DOTSI_UNFOLD_PHI3]], [[DOT6_SI_UNFOLD_TRUE]] ], [ [[DOTSI_UNFOLD_PHI4_JT0]], [[DOT6_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[FOR_COND48_BACKEDGE]] +; CHECK: for.cond48.backedge: +; CHECK-NEXT: br label [[FOR_COND48]] +; +entry: + %.5 = select i1 false, i32 0, i32 0 + %.6 = select i1 false, i32 0, i32 0 + br label %for.cond48 + +for.cond48: ; preds = %for.cond48.backedge, %entry + br i1 false, label %cleanup87, label %if.else + +if.else: ; preds = %for.cond48 + %spec.select1 = select i1 false, i32 %.5, i32 0 + %spec.select = select i1 false, i32 %.6, i32 %spec.select1 + br label %cleanup87 + +cleanup87: ; preds = %if.else, %for.cond48 + %cleanup.dest.slot.3 = phi i32 [ 0, %for.cond48 ], [ %spec.select, %if.else ] + switch i32 %cleanup.dest.slot.3, label %for.cond48.backedge [ + i32 0, label %for.cond48.backedge + i32 1, label %for.cond48.backedge + ] + +for.cond48.backedge: ; preds = %cleanup87, %cleanup87, %cleanup87 + br label %for.cond48 +} diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index 60a4214..8113ba65 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -1398,5 +1398,73 @@ define void @assume_nonnull(ptr %p) { ret void } +define void @captures_metadata_address_is_null(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_is_null +; FNATTRS-SAME: (ptr captures(address_is_null) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_is_null +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META0:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address_is_null"} + ret void +} + +define void @captures_metadata_address(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address +; FNATTRS-SAME: (ptr captures(address) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META1:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address"} + ret void +} + +define void @captures_metadata_address_read_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_address_read_provenance +; FNATTRS-SAME: (ptr captures(address, read_provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_address_read_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META2:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + ret void +} + +define void @captures_metadata_provenance(ptr %x, ptr %y) { +; FNATTRS: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; FNATTRS-LABEL: define void @captures_metadata_provenance +; FNATTRS-SAME: (ptr captures(provenance) [[X:%.*]], ptr writeonly captures(none) initializes((0, 8)) [[Y:%.*]]) #[[ATTR17]] { +; FNATTRS-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; FNATTRS-NEXT: ret void +; +; ATTRIBUTOR: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) +; ATTRIBUTOR-LABEL: define void @captures_metadata_provenance +; ATTRIBUTOR-SAME: (ptr nofree writeonly [[X:%.*]], ptr nofree nonnull writeonly captures(none) [[Y:%.*]]) #[[ATTR13]] { +; ATTRIBUTOR-NEXT: store ptr [[X]], ptr [[Y]], align 8, !captures [[META3:![0-9]+]] +; ATTRIBUTOR-NEXT: ret void +; + store ptr %x, ptr %y, !captures !{!"provenance"} + ret void +} + declare ptr @llvm.launder.invariant.group.p0(ptr) declare ptr @llvm.strip.invariant.group.p0(ptr) diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll index 854357e..edbd602 100644 --- a/llvm/test/Transforms/GlobalOpt/fastcc.ll +++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll @@ -1,16 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=globalopt -S | FileCheck %s declare token @llvm.call.preallocated.setup(i32) declare ptr @llvm.call.preallocated.arg(token, i32) define internal i32 @f(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @f +; CHECK-LABEL: define internal fastcc i32 @f( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal x86_thiscallcc i32 @g(ptr %m) { -; CHECK-LABEL: define internal fastcc i32 @g +; CHECK-LABEL: define internal fastcc i32 @g( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } @@ -18,41 +27,80 @@ define internal x86_thiscallcc i32 @g(ptr %m) { ; Leave this one alone, because the user went out of their way to request this ; convention. define internal coldcc i32 @h(ptr %m) { -; CHECK-LABEL: define internal coldcc i32 @h +; CHECK-LABEL: define internal coldcc i32 @h( +; CHECK-SAME: ptr [[M:%.*]]) unnamed_addr { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @j(ptr %m) { -; CHECK-LABEL: define internal i32 @j +; CHECK-LABEL: define internal i32 @j( +; CHECK-SAME: ptr [[M:%.*]]) { +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[M]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; %v = load i32, ptr %m ret i32 %v } define internal i32 @inalloca(ptr inalloca(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @inalloca(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @inalloca( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define i32 @inalloca2_caller(ptr inalloca(i32) %p) { +; CHECK-LABEL: define i32 @inalloca2_caller( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = musttail call i32 @inalloca2(ptr inalloca(i32) [[P]]) +; CHECK-NEXT: ret i32 [[RV]] +; %rv = musttail call i32 @inalloca2(ptr inalloca(i32) %p) ret i32 %rv } define internal i32 @inalloca2(ptr inalloca(i32) %p) { ; Because of the musttail caller, this inalloca cannot be dropped. -; CHECK-LABEL: define internal i32 @inalloca2(ptr inalloca(i32) %p) +; CHECK-LABEL: define internal i32 @inalloca2( +; CHECK-SAME: ptr inalloca(i32) [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define internal i32 @preallocated(ptr preallocated(i32) %p) { -; CHECK-LABEL: define internal fastcc i32 @preallocated(ptr %p) +; CHECK-LABEL: define internal fastcc i32 @preallocated( +; CHECK-SAME: ptr [[P:%.*]]) unnamed_addr { +; CHECK-NEXT: [[RV:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 [[RV]] +; %rv = load i32, ptr %p ret i32 %rv } define void @call_things() { +; CHECK-LABEL: define void @call_things() local_unnamed_addr { +; CHECK-NEXT: [[M:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call fastcc i32 @f(ptr [[M]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fastcc i32 @g(ptr [[M]]) +; CHECK-NEXT: [[TMP3:%.*]] = call coldcc i32 @h(ptr [[M]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @j(ptr [[M]]) +; CHECK-NEXT: [[ARGS:%.*]] = alloca inalloca i32, align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call fastcc i32 @inalloca(ptr [[ARGS]]) +; CHECK-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave.p0() +; CHECK-NEXT: [[PAARG:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i32 @preallocated(ptr [[PAARG]]) +; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP6]]) +; CHECK-NEXT: ret void +; %m = alloca i32 call i32 @f(ptr %m) call x86_thiscallcc i32 @g(ptr %m) @@ -65,15 +113,25 @@ define void @call_things() { call i32 @preallocated(ptr preallocated(i32) %N) ["preallocated"(token %c)] ret void } -; CHECK-LABEL: define void @call_things() -; CHECK: call fastcc i32 @f -; CHECK: call fastcc i32 @g -; CHECK: call coldcc i32 @h -; CHECK: call i32 @j -; CHECK: call fastcc i32 @inalloca(ptr %args) -; CHECK-NOT: llvm.call.preallocated -; CHECK: call fastcc i32 @preallocated(ptr %paarg) @llvm.used = appending global [1 x ptr] [ - ptr @j + ptr @j ], section "llvm.metadata" + +define internal i32 @assume_fastcc() { +; CHECK-LABEL: define internal fastcc i32 @assume_fastcc() { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr @assume_fastcc, i1 false, i1 false, i1 false) + ret i32 %objsize +} + +define internal i32 @constexpr_self_user() addrspace(1) { +; CHECK-LABEL: define internal fastcc i32 @constexpr_self_user() addrspace(1) { +; CHECK-NEXT: [[OBJSIZE:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) +; CHECK-NEXT: ret i32 [[OBJSIZE]] +; + %objsize = call i32 @llvm.objectsize.i32.p0(ptr addrspacecast (ptr addrspace(1) @constexpr_self_user to ptr), i1 false, i1 false, i1 false) + ret i32 %objsize +} diff --git a/llvm/test/Transforms/InstCombine/freeze-phi.ll b/llvm/test/Transforms/InstCombine/freeze-phi.ll index cdc9a5e..62bb9dc3 100644 --- a/llvm/test/Transforms/InstCombine/freeze-phi.ll +++ b/llvm/test/Transforms/InstCombine/freeze-phi.ll @@ -212,3 +212,31 @@ D: %y.fr = freeze i32 %y ret i32 %y.fr } + +; Make sure that fmf in phi node is dropped when freeze get folded. + +define float @pr161524(float noundef %arg) { +; CHECK-LABEL: @pr161524( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[ARG:%.*]], i32 144) +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FADD:%.*]] = fadd float [[ARG]], 1.000000e+00 +; CHECK-NEXT: br label [[IF_EXIT]] +; CHECK: if.exit: +; CHECK-NEXT: [[RET:%.*]] = phi float [ [[FADD]], [[IF_THEN]] ], [ [[ARG]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret float [[RET]] +; +entry: + %cond = tail call i1 @llvm.is.fpclass.f32(float %arg, i32 144) + br i1 %cond, label %if.then, label %if.exit + +if.then: + %fadd = fadd float %arg, 1.0 + br label %if.exit + +if.exit: + %ret = phi ninf float [ %fadd, %if.then ], [ %arg, %entry ] + %ret.fr = freeze float %ret + ret float %ret.fr +} diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index af5cb0c..ac7d65c 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -1464,6 +1464,27 @@ define ptr @freeze_ptrmask_nonnull(ptr %p, i64 noundef %m) { ret ptr %fr } +define i64 @pr161492_1(i1 %cond) { +; CHECK-LABEL: define i64 @pr161492_1( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %fr1 = freeze i64 poison + %fr2 = freeze i64 poison + %ret = select i1 %cond, i64 %fr1, i64 %fr2 + ret i64 %ret +} + +define i64 @pr161492_2(i1 %cond) { +; CHECK-LABEL: define i64 @pr161492_2( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %fr = freeze i64 poison + %ret = select i1 %cond, i64 %fr, i64 %fr + ret i64 %ret +} + !0 = !{} !1 = !{i64 4} !2 = !{i32 0, i32 100} diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index 0e5f046..e573108 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -635,3 +635,29 @@ define i32 @test_rotl_and_neg_wrong_mask(i32 %x, i32 %shamt) { %or = or i32 %shl, %shr ret i32 %or } + +declare void @use(i16) + +; Make sure the reused result does not produce poison. + +define i16 @fshl_concat_vector_may_produce_poison(i4 %x, i12 %y) { +; CHECK-LABEL: @fshl_concat_vector_may_produce_poison( +; CHECK-NEXT: [[X_FR:%.*]] = freeze i4 [[X:%.*]] +; CHECK-NEXT: [[ZEXT_X:%.*]] = zext i4 [[X_FR]] to i16 +; CHECK-NEXT: [[SLX:%.*]] = shl nuw i16 [[ZEXT_X]], 12 +; CHECK-NEXT: [[ZEXT_Y:%.*]] = zext i12 [[Y:%.*]] to i16 +; CHECK-NEXT: [[XY:%.*]] = or disjoint i16 [[SLX]], [[ZEXT_Y]] +; CHECK-NEXT: call void @use(i16 [[XY]]) +; CHECK-NEXT: [[YX:%.*]] = call i16 @llvm.fshl.i16(i16 [[XY]], i16 [[XY]], i16 4) +; CHECK-NEXT: ret i16 [[YX]] +; + %x.fr = freeze i4 %x + %zext.x = zext i4 %x.fr to i16 + %slx = shl nuw nsw i16 %zext.x, 12 + %zext.y = zext i12 %y to i16 + %xy = or disjoint i16 %slx, %zext.y + call void @use(i16 %xy) + %sly = shl nuw i16 %zext.y, 4 + %yx = or disjoint i16 %sly, %zext.x + ret i16 %yx +} diff --git a/llvm/test/Transforms/InstCombine/in-freeze-phi.ll b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll new file mode 100644 index 0000000..917d81b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/in-freeze-phi.ll @@ -0,0 +1,274 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define i32 @phi_freeze_same_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_same_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 42 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 42, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_mixed_consts(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_mixed_consts( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ 42, %[[CA]] ], [ 7, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ 42, %cA ], [ 7, %cB ] + ret i32 %phi +} + +define i32 @phi_freeze_with_nonconst_incoming(i32 %x, i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_with_nonconst_incoming( +; CHECK-SAME: i32 [[X:%.*]], i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB_FREEZE]] ], [ [[X]], %[[CA]] ], [ 13, %[[CB]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ %x, %cA ], [ 13, %cB ] + ret i32 %phi +} + +define <4 x i8> @phi_freeze_vector(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <4 x i8> @phi_freeze_vector( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret <4 x i8> splat (i8 9) +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <4 x i8> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB + +cA: + br label %final + +cB: + br label %final + +final: + %phi = phi <4 x i8> [ %f, %bb_freeze ], + [<i8 9, i8 9, i8 9, i8 9>, %cA ], + [<i8 9, i8 9, i8 9, i8 9>, %cB ] + ret <4 x i8> %phi +} + +define i32 @multi_use_one_folds_one_not_zero(i1 %c0, i1 %c1, i1 %c2) { +; CHECK-LABEL: define i32 @multi_use_one_folds_one_not_zero( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_OTHER3:.*]], label %[[CC1:.*]] +; CHECK: [[BB_OTHER3]]: +; CHECK-NEXT: br label %[[MID:.*]] +; CHECK: [[CC1]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[MID]] +; CHECK: [[MID]]: +; CHECK-NEXT: [[PHI_FOLD:%.*]] = phi i32 [ 0, %[[BB_OTHER3]] ], [ 1, %[[CA]] ], [ 1, %[[CB]] ] +; CHECK-NEXT: br i1 [[C2]], label %[[BB_FREEZE2:.*]], label %[[CD:.*]] +; CHECK: [[BB_FREEZE2]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER2:.*:]] +; CHECK-NEXT: br i1 true, label %[[CA]], label %[[CB]] +; CHECK: [[CC:.*:]] +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CD]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 [[PHI_FOLD]] +; +entry: + %f = freeze i32 undef + br i1 %c0, label %bb_freeze, label %bb_other +bb_freeze: + br label %mid +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %mid +cB: + br label %mid +mid: + %phi_no_fold = phi i32 [ %f, %bb_freeze ], [ 1, %cA ], [ 1, %cB ] + br i1 %c2, label %bb_freeze2, label %cD +bb_freeze2: + br label %final +bb_other2: + br i1 %c1, label %cA, label %cB +cC: + br label %final +cD: + br label %final +final: + %phi_fold = phi i32 [ %f, %bb_freeze2 ], [ 0, %cC ], [ 0, %cD ] + %a = add i32 %phi_fold, %phi_no_fold + ret i32 %a +} + +define i32 @phi_freeze_poison(i1 %c0, i1 %c1) { +; CHECK-LABEL: define i32 @phi_freeze_poison( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze i32 undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi i32 [ %f, %bb_freeze ], [ poison, %cA ], [ poison, %cB ] + ret i32 %phi +} + +define <2 x i32> @phi_freeze_poison_vec(i1 %c0, i1 %c1) { +; CHECK-LABEL: define <2 x i32> @phi_freeze_poison_vec( +; CHECK-SAME: i1 [[C0:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[C0]], label %[[BB_FREEZE:.*]], label %[[BB_OTHER:.*]] +; CHECK: [[BB_FREEZE]]: +; CHECK-NEXT: br label %[[FINAL:.*]] +; CHECK: [[BB_OTHER]]: +; CHECK-NEXT: br i1 [[C1]], label %[[CA:.*]], label %[[CB:.*]] +; CHECK: [[CA]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[CB]]: +; CHECK-NEXT: br label %[[FINAL]] +; CHECK: [[FINAL]]: +; CHECK-NEXT: [[PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB_FREEZE]] ], [ <i32 poison, i32 1>, %[[CA]] ], [ <i32 poison, i32 1>, %[[CB]] ] +; CHECK-NEXT: ret <2 x i32> [[PHI]] +; +entry: + br i1 %c0, label %bb_freeze, label %bb_other + +bb_freeze: + %f = freeze <2 x i32> undef + br label %final + +bb_other: + br i1 %c1, label %cA, label %cB +cA: + br label %final +cB: + br label %final + +final: + %phi = phi <2 x i32> [ %f, %bb_freeze ], [ <i32 poison, i32 1>, %cA ], [ <i32 poison, i32 1>, %cB ] + ret <2 x i32> %phi +} diff --git a/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check-dl.ll b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check-dl.ll new file mode 100644 index 0000000..14a4c95 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check-dl.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 +; RUN: opt -passes=loop-idiom -S %s | FileCheck %s + +target datalayout = "p:16:16" + +;. +; CHECK: @.crctable = private constant [256 x i32] zeroinitializer +;. +define void @test_with_dl() { +; CHECK-LABEL: define void @test_with_dl() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[PH:.*]] +; CHECK: [[PH_LOOPEXIT:.*]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[CRC_NEXT3:%.*]], %[[LOOP:.*]] ] +; CHECK-NEXT: br label %[[PH]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[CRC_USE:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[CRC_NEXT_LCSSA]], %[[PH_LOOPEXIT]] ] +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i32 [ 0, %[[PH]] ], [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: [[INDEXER_LO:%.*]] = trunc i32 [[CRC2]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_LO]] to i16 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i32, ptr @.crctable, i16 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i32, ptr [[TBL_PTRADD]], align 4 +; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i32 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i32 [[CRC_LE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i16 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[PH_LOOPEXIT]] +; +entry: + br label %ph + +ph: + %crc.use = phi i32 [ 0, %entry ], [ %crc.next, %loop ] + br label %loop + +loop: + %iv = phi i16 [ 0, %ph ], [ %iv.next, %loop ] + %crc = phi i32 [ 0, %ph ], [ %crc.next, %loop ] + %lshr.crc.1 = lshr i32 %crc, 1 + %crc.and.1 = and i32 %crc, 1 + %sb.check = icmp eq i32 %crc.and.1, 0 + %xor = xor i32 %lshr.crc.1, 0 + %crc.next = select i1 %sb.check, i32 %lshr.crc.1, i32 %xor + %iv.next = add i16 %iv, 1 + %exit.cond = icmp ult i16 %iv, 7 + br i1 %exit.cond, label %loop, label %ph +} diff --git a/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll index 51dc142..b2ec53c 100644 --- a/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll +++ b/llvm/test/Transforms/LoopIdiom/cyclic-redundancy-check.ll @@ -118,8 +118,8 @@ define i16 @crc16.le.tc16(i16 %msg, i16 %checksum) { ; CHECK-NEXT: [[IV_INDEXER:%.*]] = zext i8 [[IV_BITS]] to i16 ; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i16 [[MSG]], [[IV_INDEXER]] ; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i16 [[DATA_INDEXER]], [[CRC2]] -; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i16 [[CRC_DATA_INDEXER]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[INDEXER_LO:%.*]] = trunc i16 [[CRC_DATA_INDEXER]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_LO]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.2, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 ; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i16 [[CRC2]], 8 @@ -166,8 +166,8 @@ define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) { ; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i16 [[MSG]], [[IV_INDEXER]] ; CHECK-NEXT: [[CRC_INDEXER_CAST:%.*]] = zext i8 [[CRC2]] to i16 ; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i16 [[DATA_INDEXER]], [[CRC_INDEXER_CAST]] -; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i16 [[CRC_DATA_INDEXER]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[INDEXER_LO:%.*]] = trunc i16 [[CRC_DATA_INDEXER]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_LO]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i8, ptr @.crctable.3, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD]] = load i8, ptr [[TBL_PTRADD]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 @@ -212,8 +212,8 @@ define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) { ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 -; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = trunc i16 [[INDEXER_HI]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_HI_LO_BYTE]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.4, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 ; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 @@ -255,8 +255,8 @@ define i16 @crc16.be.tc8.crc.init.arg(i16 %crc.init) { ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 -; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = trunc i16 [[INDEXER_HI]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_HI_LO_BYTE]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.5, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 ; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 @@ -295,8 +295,8 @@ define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check(i16 %crc.init) { ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 -; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = trunc i16 [[INDEXER_HI]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_HI_LO_BYTE]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.6, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 ; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 @@ -406,8 +406,8 @@ define i32 @crc32.le.tc8.data32(i32 %checksum, i32 %msg) { ; CHECK-NEXT: [[IV_INDEXER:%.*]] = zext i8 [[IV_BITS]] to i32 ; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i32 [[MSG]], [[IV_INDEXER]] ; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i32 [[DATA_INDEXER]], [[CRC2]] -; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i32 [[CRC_DATA_INDEXER]], 255 -; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i32 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[INDEXER_LO:%.*]] = trunc i32 [[CRC_DATA_INDEXER]] to i8 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[INDEXER_LO]] to i64 ; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i32, ptr @.crctable.8, i64 [[INDEXER_EXT]] ; CHECK-NEXT: [[TBL_LD:%.*]] = load i32, ptr [[TBL_PTRADD]], align 4 ; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i32 [[CRC2]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index 387bb43..2391842 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -81,17 +81,6 @@ define void @powi_call(ptr %P) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP]], align 8 -; CHECK-NEXT: [[POWI:%.*]] = tail call double @llvm.powi.f64.i32(double [[L]], i32 3) -; CHECK-NEXT: store double [[POWI]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 56a4683..6e3d257 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -33,20 +33,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]] -; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8 -; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -108,20 +95,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[P_OUT_TAIL_09:%.*]] = phi ptr [ [[DST]], [[SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[SHR3:%.*]] = lshr i64 [[VAL]], [[TMP19]] -; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[SHR3]] to i8 -; CHECK-NEXT: store i8 [[CONV4]], ptr [[P_OUT_TAIL_09]], align 1 -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_OUT_TAIL_09]], i64 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index e4ee677..6cf11be 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -362,8 +362,9 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br [[EXIT:label %.*]] -; PRED: [[SCALAR_PH:.*:]] +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[EXIT]]: +; PRED-NEXT: ret void ; entry: br label %loop @@ -585,8 +586,9 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true ; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br [[EXIT:label %.*]] -; PRED: [[SCALAR_PH:.*:]] +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[EXIT]]: +; PRED-NEXT: ret void ; entry: br label %loop @@ -609,7 +611,6 @@ exit: } define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { -; ; COMMON-LABEL: define void @low_trip_count_fold_tail_scalarized_store( ; COMMON-SAME: ptr [[DST:%.*]]) { ; COMMON-NEXT: [[ENTRY:.*:]] @@ -659,16 +660,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT1:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] -; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br [[EXIT1:label %.*]] -; COMMON: [[SCALAR_PH1:.*:]] +; COMMON-NEXT: br label %[[EXIT1]] +; COMMON: [[EXIT1]]: +; COMMON-NEXT: br label %[[SCALAR_PH1:.*]] +; COMMON: [[SCALAR_PH1]]: +; COMMON-NEXT: br [[EXIT:label %.*]] +; COMMON: [[SCALAR_PH:.*:]] ; entry: br label %loop @@ -1160,8 +1161,9 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; PRED-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: -; PRED-NEXT: br [[EXIT:label %.*]] -; PRED: [[SCALAR_PH:.*:]] +; PRED-NEXT: br label %[[EXIT:.*]] +; PRED: [[EXIT]]: +; PRED-NEXT: ret void ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll index 1af55e9..71acac2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll @@ -65,36 +65,6 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds ; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_SRC_1]], align 8 -; CHECK-NEXT: [[ABS:%.*]] = tail call nnan double @llvm.fabs.f64(double [[L_1]]) -; CHECK-NEXT: [[C_0:%.*]] = fcmp olt double [[ABS]], 1.000000e+00 -; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[L_2:%.*]] = load double, ptr [[SRC_2]], align 8 -; CHECK-NEXT: [[IV_SUB_1:%.*]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[GEP_IV_SUB_1:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[IV_SUB_1]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_IV_SUB_1]], align 8 -; CHECK-NEXT: [[C_1:%.*]] = fcmp oeq double [[L_2]], 0.000000e+00 -; CHECK-NEXT: br i1 [[C_1]], label %[[MERGE:.*]], label %[[LOOP_LATCH]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[IV_SUB_2:%.*]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[GEP_IV_SUB_2:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[IV_SUB_2]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_IV_SUB_2]], align 8 -; CHECK-NEXT: br label %[[MERGE]] -; CHECK: [[MERGE]]: -; CHECK-NEXT: [[MERGE_IV:%.*]] = phi i64 [ [[IV_SUB_2]], %[[ELSE]] ], [ [[IV_SUB_1]], %[[THEN]] ] -; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i64 [[MERGE_IV]] -; CHECK-NEXT: store i32 10, ptr [[GEP_DST_1]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index 890ff1d..4bb8a0e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -69,20 +69,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SEXT:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV_NEXT]] -; CHECK-NEXT: [[FOR_NEXT]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV_NEXT]] -; CHECK-NEXT: store i32 [[SEXT]], ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1001 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index db088f8..bfee39ea 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -18,21 +18,8 @@ define double @test_reduction_costs() { ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_1:.*]] -; CHECK: [[LOOP_1]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_1:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_1_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_2:%.*]] = phi double [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[R_2_NEXT:%.*]], %[[LOOP_1]] ] -; CHECK-NEXT: [[R_1_NEXT]] = fadd double [[R_1]], 3.000000e+00 -; CHECK-NEXT: [[R_2_NEXT]] = fadd double [[R_2]], 9.000000e+00 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_1]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[R_1_NEXT_LCSSA:%.*]] = phi double [ [[R_1_NEXT]], %[[LOOP_1]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[R_2_NEXT_LCSSA:%.*]] = phi double [ [[R_2_NEXT]], %[[LOOP_1]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[DIV:%.*]] = fmul double [[R_1_NEXT_LCSSA]], [[R_2_NEXT_LCSSA]] +; CHECK-NEXT: [[DIV:%.*]] = fmul double [[TMP0]], [[TMP1]] ; CHECK-NEXT: ret double [[DIV]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index a74c33f..42a1940 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -169,22 +169,9 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; CHECK-NEXT: [[RECUR_NEXT]] = zext i32 [[L]] to i64 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RECUR_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RECUR_LCSSA]] +; CHECK-NEXT: ret i64 [[VECTOR_RECUR_EXTRACT_FOR_PHI]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll index f1571e6..d80fdd1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll @@ -51,22 +51,8 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[REM_1:%.*]] = urem i32 10, [[X]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, %[[LOOP_HEADER]] ], [ [[REM_1]], %[[THEN]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 99 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], %[[LOOP_LATCH]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP17]] ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index dd8bd27..e424649 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -474,19 +474,8 @@ define i32 @tc4(ptr noundef readonly captures(none) %tmp) vscale_range(1,16) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_0179:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP5]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ADD_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %for.body @@ -520,6 +509,7 @@ define i32 @tc4_from_profile(ptr noundef readonly captures(none) %tmp, i64 %N) v ; CHECK-NEXT: [[ADD]] = add i32 [[SUM_0179]], [[TMP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[ADD_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll index 80bf956..9f518e4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll @@ -62,18 +62,8 @@ define i32 @add_reduction_select_operand_constant_but_non_uniform() { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD2_REASS:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 42, %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[ADD2_REASS]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[RDX_NEXT]] = add i32 0, [[RDX]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD2_REASS]], 64 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ADD_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll index 544ef5c..a6e0f8a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/multiple-result-intrinsics.ll @@ -32,14 +32,7 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @sincos_f32( @@ -112,14 +105,7 @@ define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @sincos_f64( @@ -209,15 +195,6 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly ; CHECK-ARMPL: [[TMP17:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP15]], 1 ; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP16]], ptr [[TMP19:%.*]], i32 4, <vscale x 4 x i1> [[TMP14:%.*]]) ; CHECK-ARMPL: call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[TMP17]], ptr [[TMP21:%.*]], i32 4, <vscale x 4 x i1> [[TMP14]]) -; CHECK-ARMPL: [[MIDDLE_BLOCK:.*:]] -; CHECK-ARMPL: [[SCALAR_PH:.*:]] -; CHECK-ARMPL: [[FOR_BODY:.*:]] -; CHECK-ARMPL: [[IF_THEN:.*:]] -; CHECK-ARMPL: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK-ARMPL: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK-ARMPL: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK-ARMPL: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK-ARMPL: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK-ARMPL: [[IF_MERGE:.*:]] ; CHECK-ARMPL: [[FOR_END:.*:]] ; @@ -277,14 +254,7 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @modf_f32( @@ -357,14 +327,7 @@ define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @modf_f64( @@ -441,14 +404,7 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @sincospi_f32( @@ -521,14 +477,7 @@ define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; ; CHECK-ARMPL-LABEL: define void @sincospi_f64( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index ff3f6e9..56ace54 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -30,17 +30,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; DEFAULT-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -59,17 +48,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; OPTSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -88,17 +66,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; MINSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -390,23 +357,6 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[TMP72:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8 -; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP72]] -; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP72]], 1 -; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP72]], 2 -; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; DEFAULT-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -531,23 +481,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[TMP26:%.*]] = trunc nuw nsw i64 [[IV]] to i8 -; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP26]] -; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP26]], 1 -; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP26]], 2 -; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; DEFAULT-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IV]] -; DEFAULT-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 15 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -598,23 +531,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[TMP26:%.*]] = trunc nuw nsw i64 [[IV]] to i8 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP26]] -; OPTSIZE-NEXT: [[SHR:%.*]] = lshr i8 [[TMP26]], 1 -; OPTSIZE-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; OPTSIZE-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; OPTSIZE-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP26]], 2 -; OPTSIZE-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; OPTSIZE-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IV]] -; OPTSIZE-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; OPTSIZE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 15 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -665,23 +581,6 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[TMP26:%.*]] = trunc nuw nsw i64 [[IV]] to i8 -; MINSIZE-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP26]] -; MINSIZE-NEXT: [[SHR:%.*]] = lshr i8 [[TMP26]], 1 -; MINSIZE-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; MINSIZE-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; MINSIZE-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP26]], 2 -; MINSIZE-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; MINSIZE-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IV]] -; MINSIZE-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; MINSIZE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 15 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -746,23 +645,6 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -789,23 +671,6 @@ define void @dont_vectorize_with_minsize() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -832,23 +697,6 @@ define void @dont_vectorize_with_minsize() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -913,23 +761,6 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -956,23 +787,6 @@ define void @vectorization_forced_minsize_reduce_width() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -999,23 +813,6 @@ define void @vectorization_forced_minsize_reduce_width() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index 24375dd..dd239c0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -28,7 +28,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.exit: +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -80,7 +81,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]] @@ -111,7 +112,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[TMP13]] = add <4 x i32> [[TMP14]], [[VEC_PHI9]] ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] @@ -135,7 +136,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[CMP_IV_NEG:%.*]] = icmp ugt i64 [[IV_NEG]], 0 ; CHECK-NEXT: [[CMP_IV:%.*]] = icmp ne i64 [[ACCUM1]], -1 ; CHECK-NEXT: [[EXITCOND:%.*]] = and i1 [[CMP_IV_NEG]], [[CMP_IV]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_BODY1]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: while.end.loopexit: ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY1]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret void @@ -494,11 +495,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret i32 [[TMP182]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 43fccdc..49e9989 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -261,7 +261,8 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.exit: +; CHECK-NEXT: ret i32 [[TMP13]] ; ; CHECK-NOI8MM-LABEL: define i32 @sudot_neon( ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR1:[0-9]+]] { @@ -296,7 +297,8 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]] ; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) ; CHECK-NOI8MM-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-NOI8MM: scalar.ph: +; CHECK-NOI8MM: for.exit: +; CHECK-NOI8MM-NEXT: ret i32 [[TMP15]] ; entry: br label %for.body @@ -349,12 +351,13 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[PARTIAL_REDUCE5]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.exit: +; CHECK-NEXT: ret i32 [[TMP13]] ; ; CHECK-NOI8MM-LABEL: define i32 @usdot_neon( ; CHECK-NOI8MM-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR1]] { @@ -384,12 +387,13 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[TMP13]] = add <16 x i32> [[TMP11]], [[VEC_PHI1]] ; CHECK-NOI8MM-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NOI8MM-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NOI8MM-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-NOI8MM: middle.block: ; CHECK-NOI8MM-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP13]], [[TMP12]] ; CHECK-NOI8MM-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) ; CHECK-NOI8MM-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-NOI8MM: scalar.ph: +; CHECK-NOI8MM: for.exit: +; CHECK-NOI8MM-NEXT: ret i32 [[TMP15]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index 410993b..801eb81 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -30,7 +30,8 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP9]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @dotp( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { @@ -65,7 +66,8 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP14]] ; ; CHECK-MAXBW-LABEL: define i32 @dotp( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { @@ -90,7 +92,8 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP9]] ; entry: br label %for.body @@ -196,11 +199,12 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP71]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_different_types( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -354,12 +358,13 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP138]] = add <16 x i32> [[TMP136]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP139:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP139]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP139]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP138]], [[TMP137]] ; CHECK-INTERLEAVED-NEXT: [[TMP140:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP140]] ; ; CHECK-MAXBW-LABEL: define i32 @not_dotp_different_types( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -442,11 +447,12 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-MAXBW-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP71]] ; entry: br label %for.body @@ -491,11 +497,12 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15 ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP11]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_loop_carried( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -517,11 +524,12 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15 ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP11]] ; ; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_loop_carried( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -543,11 +551,12 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = add <16 x i32> [[TMP7]], [[TMP8]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP9]], i32 15 ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -594,11 +603,12 @@ define i32 @not_dotp_not_phi(ptr %a, ptr noalias %b, ptr noalias %c) { ; CHECK-INTERLEAVE1-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP13]], align 4 ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15 ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP12]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_not_phi( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { @@ -622,11 +632,12 @@ define i32 @not_dotp_not_phi(ptr %a, ptr noalias %b, ptr noalias %c) { ; CHECK-INTERLEAVED-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP13]], align 4 ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15 ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP12]] ; ; CHECK-MAXBW-LABEL: define i32 @not_dotp_not_phi( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { @@ -650,11 +661,12 @@ define i32 @not_dotp_not_phi(ptr %a, ptr noalias %b, ptr noalias %c) { ; CHECK-MAXBW-NEXT: store <16 x i32> [[TMP8]], ptr [[TMP13]], align 4 ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP7]], i32 15 ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP12]] ; entry: br label %for.body @@ -733,7 +745,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE13]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE10]]) @@ -831,7 +843,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE10]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP50]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE10]], [[PARTIAL_REDUCE13]] ; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -897,7 +909,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE13]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP31]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE13]]) ; CHECK-MAXBW-NEXT: [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE10]]) @@ -1292,11 +1304,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-INTERLEAVE1-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-INTERLEAVE1-NEXT: br label [[EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP182]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @dotp_predicated( ; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -1627,11 +1640,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-INTERLEAVED-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-INTERLEAVED-NEXT: br label [[EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP182]] ; ; CHECK-MAXBW-LABEL: define i32 @dotp_predicated( ; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -1962,11 +1976,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-MAXBW-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP182:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: br label [[EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP182]] ; entry: br label %for.body @@ -2010,12 +2025,14 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: [[RESULT:%.*]] = add i32 [[TMP10]], [[TMP11]] +; CHECK-INTERLEAVE1-NEXT: ret i32 [[RESULT]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_extend_user( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -2045,13 +2062,15 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP14]] = add <16 x i32> [[TMP12]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP14]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP10]], i32 15 ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: [[RESULT:%.*]] = add i32 [[TMP16]], [[TMP17]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT]] ; ; CHECK-MAXBW-LABEL: define i32 @not_dotp_extend_user( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -2072,12 +2091,14 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[TMP8]] = add <16 x i32> [[TMP7]], [[VEC_PHI]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: [[RESULT:%.*]] = add i32 [[TMP10]], [[TMP11]] +; CHECK-MAXBW-NEXT: ret i32 [[RESULT]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 09917fc..6e11e55 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -501,7 +501,8 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP71]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @not_dotp_different_types( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -660,7 +661,8 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP138]], [[TMP137]] ; CHECK-INTERLEAVED-NEXT: [[TMP142:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP142]] ; ; CHECK-MAXBW-LABEL: define i32 @not_dotp_different_types( ; CHECK-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -747,7 +749,8 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP138]]) ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP71]] ; entry: br label %for.body @@ -800,7 +803,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP17]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8 @@ -848,7 +851,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul nuw i32 [[TMP29]], 8 @@ -890,7 +893,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP27:%.*]] = add <vscale x 8 x i32> [[TMP25]], [[TMP26]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = mul nuw i32 [[TMP20]], 8 @@ -949,7 +952,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8 @@ -987,7 +990,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8 @@ -1019,7 +1022,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP20]], [[TMP19]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-MAXBW-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8 @@ -1108,7 +1111,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP41]] = add <vscale x 4 x i32> [[TMP40]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP41]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP35]]) @@ -1226,7 +1229,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP81]] = add <vscale x 4 x i32> [[TMP79]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP18]] ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP81]], [[TMP80]] ; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) @@ -1296,7 +1299,7 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE16]] = call <vscale x 2 x i32> @llvm.vector.partial.reduce.add.nxv2i32.nxv8i32(<vscale x 2 x i32> [[VEC_PHI4]], <vscale x 8 x i32> [[TMP73]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP74:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE16]]) ; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE17]]) @@ -1393,11 +1396,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) ; CHECK-INTERLEAVE1-NEXT: br label [[EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP22]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @dotp_predicated( ; CHECK-INTERLEAVED-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -1430,11 +1434,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = xor i1 [[TMP20]], true -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) ; CHECK-INTERLEAVED-NEXT: br label [[EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP22]] ; ; CHECK-MAXBW-LABEL: define i32 @dotp_predicated( ; CHECK-MAXBW-SAME: i64 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -1467,11 +1472,12 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = xor i1 [[TMP19]], true -; CHECK-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: br label [[EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP21]] ; entry: br label %for.body @@ -1519,7 +1525,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP14]] = add <vscale x 4 x i32> [[TMP13]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() @@ -1566,7 +1572,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP24]] = add <vscale x 4 x i32> [[TMP22]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 4 x i32> [[TMP24]], [[TMP23]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]]) @@ -1601,7 +1607,7 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP24]] = add <vscale x 8 x i32> [[TMP22]], [[VEC_PHI1]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP24]]) ; CHECK-MAXBW-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() @@ -1660,7 +1666,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP15]] = add <vscale x 2 x i64> [[VEC_PHI]], [[TMP14]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP15]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]] @@ -1707,7 +1713,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP25]] = add <vscale x 2 x i64> [[VEC_PHI1]], [[TMP23]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP25]], [[TMP24]] ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]]) @@ -1742,7 +1748,7 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP14]] = add <vscale x 8 x i64> [[VEC_PHI]], [[TMP13]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP14]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 41, [[N_VEC]] @@ -1860,7 +1866,7 @@ define void @not_dotp_not_phi2(ptr %matrix, i32 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add i32 [[TMP21]], [[TMP15]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP23]], [[TMP22]] ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -1972,7 +1978,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2010,7 +2016,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]]) @@ -2047,7 +2053,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2105,7 +2111,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2143,7 +2149,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]]) @@ -2180,7 +2186,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 4 x i64> [[TMP10]], [[VEC_PHI]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> [[TMP11]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2247,7 +2253,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP18]] = add <vscale x 2 x i64> [[TMP17]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP18]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -2301,7 +2307,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP28]] = add <vscale x 2 x i64> [[TMP26]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP28]], [[TMP27]] ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]]) @@ -2343,7 +2349,7 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-MAXBW-NEXT: [[TMP20]] = add <vscale x 8 x i64> [[TMP17]], [[VEC_PHI]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.add.nxv8i64(<vscale x 8 x i64> [[TMP20]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] @@ -2465,7 +2471,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-INTERLEAVE1-NEXT: [[TMP36]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP38:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP36]]) ; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP33]]) @@ -2565,7 +2571,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE21]]) ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE20]]) @@ -2665,7 +2671,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP28]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-MAXBW-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP30:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE21]]) ; CHECK-MAXBW-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE20]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 1ef5b20..db3166c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -499,7 +499,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVE1: scalar.ph: +; CHECK-INTERLEAVE1: for.exit: +; CHECK-INTERLEAVE1-NEXT: ret i32 [[TMP13]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @zext_add_reduc_i8_i32_predicated( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]]) #[[ATTR0]] { @@ -527,7 +528,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) ; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-INTERLEAVED: scalar.ph: +; CHECK-INTERLEAVED: for.exit: +; CHECK-INTERLEAVED-NEXT: ret i32 [[TMP13]] ; ; CHECK-MAXBW-LABEL: define i32 @zext_add_reduc_i8_i32_predicated( ; CHECK-MAXBW-SAME: ptr [[A:%.*]]) #[[ATTR0]] { @@ -555,7 +557,8 @@ define i32 @zext_add_reduc_i8_i32_predicated(ptr %a) #0 { ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK-MAXBW: scalar.ph: +; CHECK-MAXBW: for.exit: +; CHECK-MAXBW-NEXT: ret i32 [[TMP12]] ; entry: br label %for.body @@ -674,7 +677,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVE1-NEXT: [[TMP4]] = sub <16 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]]) ; CHECK-INTERLEAVE1-NEXT: br label [[SCALAR_PH:%.*]] @@ -700,7 +703,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = sub <16 x i32> [[VEC_PHI1]], [[TMP5]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) @@ -726,7 +729,7 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: [[TMP10]] = sub <vscale x 8 x i32> [[VEC_PHI]], [[TMP9]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP10]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -768,7 +771,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP4]] = add <16 x i32> [[TMP3]], [[VEC_PHI]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]]) ; CHECK-INTERLEAVE1-NEXT: br label [[SCALAR_PH:%.*]] @@ -794,7 +797,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[TMP5]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) @@ -820,7 +823,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] @@ -871,7 +874,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] @@ -906,7 +909,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT]] ; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add nuw i32 [[VEC_PHI1]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP21]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) @@ -942,7 +945,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[TMP11]] = add <vscale x 16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> [[TMP11]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] @@ -993,7 +996,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI]], [[TMP3]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-INTERLEAVE1: middle.block: ; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP6]]) ; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] @@ -1028,7 +1031,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <16 x i32> [[VEC_PHI2]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add nuw i32 [[VEC_PHI1]], 32 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP22]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP8]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) @@ -1064,7 +1067,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-MAXBW-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-MAXBW: middle.block: ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) ; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll index c4feabe..edf7e28 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr73894.ll @@ -50,22 +50,9 @@ define i32 @pr70988(ptr %src, i32 %n) { ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP17]], i32 [[TMP18]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDUC:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDUC_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDUC]] -; CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[GEP]], align 8 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24]] = tail call i32 @llvm.smax.i32(i32 [[TMP23]], i32 [[MAX]]) -; CHECK-NEXT: [[INDUC_NEXT]] = add nuw nsw i64 [[INDUC]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDUC_NEXT]], [[UMAX]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP24]], [[LOOP]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; entry: %1 = and i32 %n, 15 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 0c7dc29..0f82de62 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -241,42 +241,8 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP41]]) ; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH:.*]]: -; PRED-NEXT: br label %[[LOOP:.*]] -; PRED: [[LOOP]]: -; PRED-NEXT: [[TMP45:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[TMP53:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[SCALAR_RECUR10:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[TMP45]], %[[LOOP]] ] -; PRED-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[SUM_RED:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[TMP52:%.*]] = add i64 [[Y]], 1 -; PRED-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[SRC_1]], i64 [[TMP52]] -; PRED-NEXT: [[TMP53]] = load i32, ptr [[GEP_1]], align 4 -; PRED-NEXT: [[OR3:%.*]] = or i32 [[SCALAR_RECUR10]], [[X]] -; PRED-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 -; PRED-NEXT: [[SHR:%.*]] = lshr i32 [[X]], 1 -; PRED-NEXT: [[TMP54:%.*]] = shl i32 [[OR3]], 1 -; PRED-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 2 -; PRED-NEXT: [[SHL19:%.*]] = shl i32 [[X]], 1 -; PRED-NEXT: [[TMP56:%.*]] = or i32 [[SHR]], [[SHL19]] -; PRED-NEXT: [[TMP57:%.*]] = or i32 [[TMP56]], [[TMP55]] -; PRED-NEXT: [[TMP58:%.*]] = or i32 [[TMP57]], [[X]] -; PRED-NEXT: [[OR20:%.*]] = or i32 [[Z]], [[X]] -; PRED-NEXT: [[NOT:%.*]] = and i32 [[OR20]], 1 -; PRED-NEXT: [[AND:%.*]] = xor i32 [[NOT]], 1 -; PRED-NEXT: [[IDX_EXT_1:%.*]] = zext i32 [[AND]] to i64 -; PRED-NEXT: [[GEP_2:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[IDX_EXT_1]] -; PRED-NEXT: [[TMP59:%.*]] = load i32, ptr [[GEP_2]], align 4 -; PRED-NEXT: [[SHR24:%.*]] = lshr i32 [[TMP58]], 1 -; PRED-NEXT: [[IDX_EXT_2:%.*]] = zext i32 [[SHR24]] to i64 -; PRED-NEXT: [[GEP_3:%.*]] = getelementptr i32, ptr [[SRC_2]], i64 [[IDX_EXT_2]] -; PRED-NEXT: [[TMP60:%.*]] = load i32, ptr [[GEP_3]], align 4 -; PRED-NEXT: [[RED_1:%.*]] = or i32 [[TMP59]], [[SUM_RED]] -; PRED-NEXT: [[RED_2]] = or i32 [[RED_1]], [[TMP60]] -; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[Y]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; PRED: [[EXIT]]: -; PRED-NEXT: [[RED_2_LCSSA:%.*]] = phi i32 [ [[RED_2]], %[[LOOP]] ], [ [[TMP44]], %[[MIDDLE_BLOCK]] ] -; PRED-NEXT: ret i32 [[RED_2_LCSSA]] +; PRED-NEXT: ret i32 [[TMP44]] ; entry: br label %loop @@ -497,21 +463,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 { ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: [[TMP19:%.*]] = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> [[TMP16]]) ; PRED-NEXT: br label %[[EXIT:.*]] -; PRED: [[SCALAR_PH:.*]]: -; PRED-NEXT: br label %[[LOOP:.*]] -; PRED: [[LOOP]]: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[RED:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; PRED-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; PRED-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; PRED-NEXT: [[DIV:%.*]] = udiv i16 [[L]], [[X]] -; PRED-NEXT: [[RED_NEXT]] = or i16 [[DIV]], [[RED]] -; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; PRED: [[EXIT]]: -; PRED-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i16 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP19]], %[[MIDDLE_BLOCK]] ] -; PRED-NEXT: ret i16 [[RED_NEXT_LCSSA]] +; PRED-NEXT: ret i16 [[TMP19]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index 885c7904..5072058 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -144,20 +144,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP12]], [[SUM_07]] -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP9]] ; @@ -390,23 +378,11 @@ define float @fadd_strict_unroll(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT14]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP39]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP40:%.*]] = extractelement <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP41:%.*]] = xor i1 [[TMP40]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP42:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD]] = fadd float [[TMP42]], [[SUM_07]] -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: ret float [[ADD_LCSSA]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP30]] ; @@ -630,30 +606,12 @@ define void @fadd_strict_interleave(ptr noalias nocapture readonly %a, ptr noali ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[ADD_PHI1:%.*]] = phi float [ [[A2]], [[SCALAR_PH:%.*]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD_PHI2:%.*]] = phi float [ [[A1]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDXB1]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD1]] = fadd float [[TMP19]], [[ADD_PHI2]] -; CHECK-ORDERED-TF-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDXB2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OR]] -; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDXB2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD2]] = fadd float [[TMP20]], [[ADD_PHI1]] -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 2 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[ADD1_LCSSA:%.*]] = phi float [ [[ADD1]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: [[ADD2_LCSSA:%.*]] = phi float [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: store float [[ADD1_LCSSA]], ptr [[A]], align 4 -; CHECK-ORDERED-TF-NEXT: store float [[ADD2_LCSSA]], ptr [[ARRAYIDXA]], align 4 +; CHECK-ORDERED-TF-NEXT: store float [[TMP16]], ptr [[A]], align 4 +; CHECK-ORDERED-TF-NEXT: store float [[TMP14]], ptr [[ARRAYIDXA]], align 4 ; CHECK-ORDERED-TF-NEXT: ret void ; @@ -863,28 +821,13 @@ define float @fadd_of_sum(ptr noalias nocapture readonly %a, ptr noalias nocaptu ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP7]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[RES_014:%.*]] = phi float [ [[RDX:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ADD:%.*]] = fadd float [[TMP15]], [[TMP16]] -; CHECK-ORDERED-TF-NEXT: [[RDX]] = fadd float [[RES_014]], [[ADD]] -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-ORDERED-TF: for.end.loopexit: -; CHECK-ORDERED-TF-NEXT: [[RDX_LCSSA:%.*]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[RDX_LCSSA]], [[FOR_END_LOOPEXIT]] ] +; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP12]], [[FOR_END_LOOPEXIT]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RES]] ; @@ -1081,31 +1024,11 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ 1.000000e+00, [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP15]], 0.000000e+00 -; CHECK-ORDERED-TF-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK-ORDERED-TF: if.then: -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: br label [[FOR_INC]] -; CHECK-ORDERED-TF: for.inc: -; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP16]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP12]] ; @@ -1245,7 +1168,7 @@ define float @fadd_multiple(ptr noalias nocapture %a, ptr noalias nocapture %b, ; CHECK-ORDERED-TF-NEXT: [[ADD3]] = fadd float [[ADD]], [[TMP1]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-ORDERED-TF: for.end: ; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[ADD3]], [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] @@ -1542,25 +1465,11 @@ define float @fmuladd_strict(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP56]], float [[TMP57]], float [[SUM_07]]) -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP44]] ; @@ -1852,25 +1761,11 @@ define float @fmuladd_strict_fmf(ptr %a, ptr %b, i64 %n) #0 { ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT18]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP53]], i64 [[TMP6]]) ; CHECK-ORDERED-TF-NEXT: [[TMP54:%.*]] = extractelement <vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br label [[FOR_END:%.*]] -; CHECK-ORDERED-TF: scalar.ph: -; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-ORDERED-TF: for.body: -; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP56:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP57:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-ORDERED-TF-NEXT: [[MULADD]] = tail call nnan float @llvm.fmuladd.f32(float [[TMP56]], float [[TMP57]], float [[SUM_07]]) -; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ] -; CHECK-ORDERED-TF-NEXT: ret float [[MULADD_LCSSA]] +; CHECK-ORDERED-TF-NEXT: ret float [[TMP44]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 4e989c5..3b016f8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -129,20 +129,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -203,21 +191,8 @@ define i64 @loop_contains_safe_call() #1 { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -365,22 +340,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 79fb3fd..c775b44 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -88,16 +88,7 @@ define void @cost_store_i8(ptr %dst) #0 { ; PRED-NEXT: [[TMP12:%.*]] = xor i1 [[TMP14]], true ; PRED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PRED: middle.block: -; PRED-NEXT: br label [[EXIT:%.*]] -; PRED: scalar.ph: ; PRED-NEXT: br label [[LOOP:%.*]] -; PRED: loop: -; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] -; PRED-NEXT: store i8 0, ptr [[GEP]], align 1 -; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 -; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; PRED: exit: ; PRED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index 3f230b7..e084307 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -490,8 +490,7 @@ define float @fadd_predicated(ptr noalias nocapture %a, i64 %n) { ; CHECK-ORDERED: %[[MASK:.*]] = select <2 x i1> %0, <2 x float> %[[PHI]], <2 x float> splat (float -0.000000e+00) ; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.v2f32(float %[[RDX_PHI]], <2 x float> %[[MASK]]) ; CHECK-ORDERED: for.end: -; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD:.*]], %for.body ], [ %[[RDX]], %middle.block ] -; CHECK-ORDERED: ret float %[[RES_PHI]] +; CHECK-ORDERED: ret float %[[RDX]] ; CHECK-UNORDERED-LABEL: @fadd_predicated ; CHECK-UNORDERED: vector.ph @@ -507,12 +506,8 @@ define float @fadd_predicated(ptr noalias nocapture %a, i64 %n) { ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd ; CHECK-UNORDERED: middle.block ; CHECK-UNORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> %[[MASK]]) -; CHECK-UNORDERED: for.body -; CHECK-UNORDERED: %[[LOAD:.*]] = load float, ptr -; CHECK-UNORDERED: %[[FADD2:.*]] = fadd float {{.*}}, %[[LOAD]] ; CHECK-UNORDERED: for.end -; CHECK-UNORDERED: %[[SUM:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ] -; CHECK-UNORDERED: ret float %[[SUM]] +; CHECK-UNORDERED: ret float %[[RDX]] ; CHECK-NOT-VECTORIZED-LABEL: @fadd_predicated ; CHECK-NOT-VECTORIZED-NOT: vector.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll index bdbbfdf..9526a84 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll @@ -31,10 +31,7 @@ define void @struct_return_widen(ptr noalias %in, ptr noalias writeonly %out_a, ; CHECK: [[VECTOR_BODY:.*:]] ; CHECK: [[TMP2:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD:%.*]]) ; CHECK: [[TMP3:%.*]] = call { <2 x half>, <2 x half> } @fixed_vec_foo(<2 x half> [[WIDE_LOAD1:%.*]]) -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR2:[0-9]+]] ; CHECK: [[EXIT:.*:]] ; entry: @@ -82,12 +79,9 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out ; CHECK: [[ENTRY:.*:]] ; CHECK: [[VECTOR_PH:.*:]] ; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] -; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR3]] +; CHECK: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] +; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR2]] ; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] -; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]] ; CHECK: [[EXIT:.*:]] ; entry: @@ -162,7 +156,7 @@ define void @struct_return_scalable(ptr noalias %in, ptr noalias writeonly %out_ ; CHECK: [[MIDDLE_BLOCK:.*:]] ; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR3]] +; CHECK: [[CALL:%.*]] = tail call { half, half } @foo(half [[IN_VAL:%.*]]) #[[ATTR2]] ; CHECK: [[EXIT:.*:]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 8d33ccb..bbc0e33 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -49,7 +49,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -59,7 +59,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -97,7 +97,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-VF8: vec.epilog.ph: ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -107,7 +107,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 -; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: @@ -150,7 +150,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] @@ -182,13 +182,13 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -198,7 +198,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 -; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: @@ -261,13 +261,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 @@ -279,7 +279,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -313,13 +313,13 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 @@ -331,7 +331,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] ; CHECK-VF8-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -382,14 +382,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000 @@ -400,7 +400,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000 -; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -433,14 +433,14 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-VF8-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: ; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000 @@ -451,7 +451,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000 -; CHECK-VF8-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: @@ -514,13 +514,13 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 @@ -536,7 +536,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -576,7 +576,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] @@ -606,18 +606,12 @@ exit: } ; Loop with vscale-based trip count vscale x 1024. -; TODO: No epilogue vectorizations should remain when choosing VF = vscale x 4. define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalias %b) vscale_range(1, 16) #0 { ; CHECK-LABEL: @trip_count_vscale_no_epilogue_iterations( -; CHECK-NEXT: iter.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024 -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 @@ -625,7 +619,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2 @@ -644,31 +638,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX4]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX4]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP19]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x float> [[WIDE_LOAD5]], [[WIDE_LOAD6]] -; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4 -; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N]] -; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] +; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; @@ -703,7 +677,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-VF8: middle.block: ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 33b3629..3b0bd87 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -116,7 +116,8 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_TAIL_FOLDING: scalar.ph: +; PREDICATED_TAIL_FOLDING: for.end: +; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 @@ -243,10 +244,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_TAIL_FOLDING: scalar.ph: +; PREDICATED_TAIL_FOLDING: for.end: +; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 @@ -377,10 +379,11 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT4]] -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_TAIL_FOLDING: scalar.ph: +; PREDICATED_TAIL_FOLDING: for.end: +; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard1 to i32 @@ -537,10 +540,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] -; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] +; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP5:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_TAIL_FOLDING: scalar.ph: +; PREDICATED_TAIL_FOLDING: for.end: +; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 16acd3f..b8b4fbd 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -69,7 +69,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 069d369..cb2c003 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -29,7 +29,8 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: +; CHECK: for.end: +; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index 61448bd..33ee0d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -33,7 +33,10 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; entry: %cmp6.not = icmp eq i32 %N, 0 @@ -87,10 +90,13 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void ; entry: %cmp6.not = icmp eq i64 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index b725669f..b5544dc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -36,21 +36,9 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) -; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = add i32 [[RED]], [[VAL]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: while.end.loopexit: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP19]] ; ; CHECK-IN-LOOP-LABEL: @add_reduction_i32( ; CHECK-IN-LOOP-NEXT: entry: @@ -81,21 +69,9 @@ define i32 @add_reduction_i32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[TMP19:%.*]] = xor i1 [[TMP18]], true ; CHECK-IN-LOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK-IN-LOOP: scalar.ph: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK-IN-LOOP: while.body: -; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[RED:%.*]] = phi i32 [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-IN-LOOP-NEXT: [[RED_NEXT]] = add i32 [[RED]], [[VAL]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-IN-LOOP-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-IN-LOOP-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-IN-LOOP: while.end.loopexit: -; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; CHECK-IN-LOOP-NEXT: ret i32 [[RED_NEXT_LCSSA]] +; CHECK-IN-LOOP-NEXT: ret i32 [[TMP15]] ; entry: br label %while.body @@ -141,23 +117,11 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = fadd float [[RED]], [[VAL]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: while.end.loopexit: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret float [[TMP14]] ; ; CHECK-IN-LOOP-LABEL: @add_reduction_f32( ; CHECK-IN-LOOP-NEXT: entry: @@ -185,23 +149,11 @@ define float @add_reduction_f32(ptr %ptr, i64 %n) #0 { ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true -; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-IN-LOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK-IN-LOOP: scalar.ph: ; CHECK-IN-LOOP-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK-IN-LOOP: while.body: -; CHECK-IN-LOOP-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[RED:%.*]] = phi float [ [[RED_NEXT:%.*]], [[WHILE_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-IN-LOOP-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[PTR]], i64 [[INDEX]] -; CHECK-IN-LOOP-NEXT: [[VAL:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-IN-LOOP-NEXT: [[RED_NEXT]] = fadd float [[RED]], [[VAL]] -; CHECK-IN-LOOP-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-IN-LOOP-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-IN-LOOP-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-IN-LOOP: while.end.loopexit: -; CHECK-IN-LOOP-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], [[WHILE_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-IN-LOOP-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-IN-LOOP-NEXT: ret float [[TMP14]] ; entry: br label %while.body @@ -251,32 +203,12 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP16]], true -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP20]]) -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 7, [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP26]], 5 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP27]] -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[RES]] = phi i32 [ [[RDX]], [[FOR_BODY]] ], [ [[XOR]], [[IF_THEN]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP25]] ; ; CHECK-IN-LOOP-LABEL: @cond_xor_reduction( ; CHECK-IN-LOOP-NEXT: entry: @@ -308,31 +240,11 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-IN-LOOP-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-IN-LOOP-NEXT: [[TMP22:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-IN-LOOP-NEXT: [[TMP23:%.*]] = xor i1 [[TMP22]], true -; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-IN-LOOP-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-IN-LOOP: middle.block: -; CHECK-IN-LOOP-NEXT: br label [[FOR_END:%.*]] -; CHECK-IN-LOOP: scalar.ph: -; CHECK-IN-LOOP-NEXT: br label [[FOR_BODY:%.*]] -; CHECK-IN-LOOP: for.body: -; CHECK-IN-LOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-IN-LOOP-NEXT: [[RDX:%.*]] = phi i32 [ 7, [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] -; CHECK-IN-LOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[IV]] -; CHECK-IN-LOOP-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-IN-LOOP-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP24]], 5 -; CHECK-IN-LOOP-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK-IN-LOOP: if.then: -; CHECK-IN-LOOP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-IN-LOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-IN-LOOP-NEXT: [[XOR:%.*]] = xor i32 [[RDX]], [[TMP25]] -; CHECK-IN-LOOP-NEXT: br label [[FOR_INC]] -; CHECK-IN-LOOP: for.inc: -; CHECK-IN-LOOP-NEXT: [[RES]] = phi i32 [ [[RDX]], [[FOR_BODY]] ], [ [[XOR]], [[IF_THEN]] ] -; CHECK-IN-LOOP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-IN-LOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-IN-LOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-IN-LOOP-NEXT: br label [[FOR_INC:%.*]] ; CHECK-IN-LOOP: for.end: -; CHECK-IN-LOOP-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; CHECK-IN-LOOP-NEXT: ret i32 [[RES_LCSSA]] +; CHECK-IN-LOOP-NEXT: ret i32 [[TMP19]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index 1879386..5531b3c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -72,7 +72,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -176,10 +177,11 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT16]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[TMP93]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP66:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP67:%.*]] = xor i1 [[TMP66]], true -; CHECK-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index ec17872..9ebe790 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -33,7 +33,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -73,10 +74,11 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX1]], i64 [[TMP2]]) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP6]], true -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -120,10 +122,11 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP12]], true -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -180,10 +183,11 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP22:%.*]] = xor i1 [[TMP21]], true ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -231,10 +235,11 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -284,10 +289,11 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP14]], true -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: +; CHECK: for.end: +; CHECK-NEXT: ret void ; entry: @@ -342,10 +348,11 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: +; CHECK: for.end: +; CHECK-NEXT: ret void ; entry: @@ -403,10 +410,11 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: +; CHECK: for.end: +; CHECK-NEXT: ret void ; entry: @@ -454,10 +462,11 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -509,10 +518,11 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = xor i1 [[TMP14]], true -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void ; entry: br label %while.body @@ -551,7 +561,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index e7d25a0..742097b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -562,7 +562,8 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: for.exit: +; CHECK-NEXT: ret void ; entry: br label %for.body @@ -626,7 +627,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -719,7 +720,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll index e6ff39b..6da3c77c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -317,19 +317,7 @@ define void @test_v4_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -369,19 +357,7 @@ define void @test_v2_v4m(ptr noalias %a, ptr readonly %b) #3 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -421,19 +397,7 @@ define void @test_v2_v4(ptr noalias %a, ptr readonly %b) #3 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index c44db7d..1607755 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -71,16 +71,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA: middle.block: -; DATA-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; DATA: scalar.ph: ; DATA-NEXT: br label [[WHILE_BODY:%.*]] -; DATA: while.body: -; DATA-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; DATA-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; DATA-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; DATA-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; DATA-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; DATA-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; DATA: while.end.loopexit: ; DATA-NEXT: ret void ; @@ -115,16 +106,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]] ; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_NO_LANEMASK: middle.block: -; DATA_NO_LANEMASK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; DATA_NO_LANEMASK: scalar.ph: ; DATA_NO_LANEMASK-NEXT: br label [[WHILE_BODY:%.*]] -; DATA_NO_LANEMASK: while.body: -; DATA_NO_LANEMASK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; DATA_NO_LANEMASK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; DATA_NO_LANEMASK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; DATA_NO_LANEMASK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; DATA_NO_LANEMASK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; DATA_NO_LANEMASK: while.end.loopexit: ; DATA_NO_LANEMASK-NEXT: ret void ; @@ -150,16 +132,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true ; DATA_AND_CONTROL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL: middle.block: -; DATA_AND_CONTROL-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; DATA_AND_CONTROL: scalar.ph: ; DATA_AND_CONTROL-NEXT: br label [[WHILE_BODY:%.*]] -; DATA_AND_CONTROL: while.body: -; DATA_AND_CONTROL-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; DATA_AND_CONTROL-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; DATA_AND_CONTROL-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; DATA_AND_CONTROL-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; DATA_AND_CONTROL-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; DATA_AND_CONTROL-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; DATA_AND_CONTROL: while.end.loopexit: ; DATA_AND_CONTROL-NEXT: ret void ; @@ -190,16 +163,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP15]], true ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: middle.block: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]] -; DATA_AND_CONTROL_NO_RT_CHECK: scalar.ph: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; DATA_AND_CONTROL_NO_RT_CHECK: while.body: -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; DATA_AND_CONTROL_NO_RT_CHECK: while.end.loopexit: ; DATA_AND_CONTROL_NO_RT_CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 038330b..c261760 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -22,21 +22,6 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br label %[[EXIT:.*]] -; VF2: [[SCALAR_PH:.*]]: -; VF2-NEXT: br label %[[LOOP:.*]] -; VF2: [[LOOP]]: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF2-NEXT: [[MUL_2:%.*]] = shl nsw i64 [[IV]], 1 -; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[MUL_2]] -; VF2-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 -; VF2-NEXT: store i64 [[L_0]], ptr [[DATA_0]], align 8 -; VF2-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[MUL_2]], 1 -; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[ADD_1]] -; VF2-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8 -; VF2-NEXT: store i64 [[L_1]], ptr [[DATA_1]], align 8 -; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 2 -; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF2: [[EXIT]]: ; VF2-NEXT: ret void ; @@ -86,33 +71,18 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: br i1 false, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; VF4: [[PRED_STORE_IF5]]: ; VF4-NEXT: [[TMP27:%.*]] = shl nsw i64 3, 1 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]] -; VF4-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8 -; VF4-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 -; VF4-NEXT: [[TMP30:%.*]] = or disjoint i64 [[TMP27]], 1 -; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP30]] -; VF4-NEXT: [[TMP32:%.*]] = load i64, ptr [[TMP31]], align 8 -; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 -; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]] -; VF4: [[PRED_STORE_CONTINUE6]]: -; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] -; VF4: [[MIDDLE_BLOCK]]: -; VF4-NEXT: br label %[[EXIT:.*]] -; VF4: [[SCALAR_PH:.*]]: -; VF4-NEXT: br label %[[LOOP:.*]] -; VF4: [[LOOP]]: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF4-NEXT: [[MUL_2:%.*]] = shl nsw i64 [[IV]], 1 -; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[MUL_2]] +; VF4-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP27]] ; VF4-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 ; VF4-NEXT: store i64 [[L_0]], ptr [[DATA_0]], align 8 -; VF4-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[MUL_2]], 1 +; VF4-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[TMP27]], 1 ; VF4-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[ADD_1]] ; VF4-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8 ; VF4-NEXT: store i64 [[L_1]], ptr [[DATA_1]], align 8 -; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 2 -; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] +; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; VF4: [[PRED_STORE_CONTINUE6]]: +; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; VF4: [[MIDDLE_BLOCK]]: +; VF4-NEXT: br label %[[EXIT:.*]] ; VF4: [[EXIT]]: ; VF4-NEXT: ret void ; @@ -237,27 +207,6 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: [[MIDDLE_BLOCK]]: ; VF2-NEXT: br label %[[EXIT:.*]] -; VF2: [[SCALAR_PH:.*]]: -; VF2-NEXT: br label %[[LOOP:.*]] -; VF2: [[LOOP]]: -; VF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV]] -; VF2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV]] -; VF2-NEXT: [[L_A_0:%.*]] = load float, ptr [[GEP_A_0]], align 4 -; VF2-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_A_0]], i64 4 -; VF2-NEXT: [[L_A_1:%.*]] = load float, ptr [[GEP_A_1]], align 4 -; VF2-NEXT: [[L_B_0:%.*]] = load float, ptr [[GEP_B_0]], align 4 -; VF2-NEXT: [[ADD_0:%.*]] = fadd float [[L_A_0]], [[L_B_0]] -; VF2-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_B_0]], i64 4 -; VF2-NEXT: [[L_B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4 -; VF2-NEXT: [[ADD_1:%.*]] = fadd float [[L_A_1]], [[L_B_1]] -; VF2-NEXT: [[GEP_RES_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[IV]] -; VF2-NEXT: store float [[ADD_0]], ptr [[GEP_RES_0]], align 4 -; VF2-NEXT: [[GEP_RES_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_RES_0]], i64 4 -; VF2-NEXT: store float [[ADD_1]], ptr [[GEP_RES_1]], align 4 -; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 -; VF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF2: [[EXIT]]: ; VF2-NEXT: ret void ; @@ -282,27 +231,6 @@ define void @test_complex_add_float_tc_4(ptr %res, ptr noalias %A, ptr noalias % ; VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] -; VF4: [[SCALAR_PH:.*]]: -; VF4-NEXT: br label %[[LOOP:.*]] -; VF4: [[LOOP]]: -; VF4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF4-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[A]], i64 [[IV]] -; VF4-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[B]], i64 [[IV]] -; VF4-NEXT: [[L_A_0:%.*]] = load float, ptr [[GEP_A_0]], align 4 -; VF4-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_A_0]], i64 4 -; VF4-NEXT: [[L_A_1:%.*]] = load float, ptr [[GEP_A_1]], align 4 -; VF4-NEXT: [[L_B_0:%.*]] = load float, ptr [[GEP_B_0]], align 4 -; VF4-NEXT: [[ADD_0:%.*]] = fadd float [[L_A_0]], [[L_B_0]] -; VF4-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_B_0]], i64 4 -; VF4-NEXT: [[L_B_1:%.*]] = load float, ptr [[GEP_B_1]], align 4 -; VF4-NEXT: [[ADD_1:%.*]] = fadd float [[L_A_1]], [[L_B_1]] -; VF4-NEXT: [[GEP_RES_0:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[RES]], i64 [[IV]] -; VF4-NEXT: store float [[ADD_0]], ptr [[GEP_RES_0]], align 4 -; VF4-NEXT: [[GEP_RES_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_RES_0]], i64 4 -; VF4-NEXT: store float [[ADD_1]], ptr [[GEP_RES_1]], align 4 -; VF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 -; VF4-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF4: [[EXIT]]: ; VF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll index a044ae8..d290f2d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll @@ -27,21 +27,6 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[MUL_2:%.*]] = shl nsw i64 [[IV]], 1 -; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[MUL_2]] -; CHECK-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 -; CHECK-NEXT: store i64 [[L_0]], ptr [[DATA_0]], align 8 -; CHECK-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[MUL_2]], 1 -; CHECK-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[ADD_1]] -; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8 -; CHECK-NEXT: store i64 [[L_1]], ptr [[DATA_1]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -105,25 +90,6 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV]] -; CHECK-NEXT: [[L_FACTOR:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = shl nsw i64 [[IV]], 1 -; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP13]] -; CHECK-NEXT: [[L_0:%.*]] = load i64, ptr [[DATA_0]], align 8 -; CHECK-NEXT: [[MUL_0:%.*]] = mul i64 [[L_FACTOR]], [[L_0]] -; CHECK-NEXT: store i64 [[MUL_0]], ptr [[DATA_0]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = or disjoint i64 [[TMP13]], 1 -; CHECK-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP14]] -; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[DATA_1]], align 8 -; CHECK-NEXT: [[MUL_1:%.*]] = mul i64 [[L_FACTOR]], [[L_1]] -; CHECK-NEXT: store i64 [[MUL_1]], ptr [[DATA_1]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index edb9519..187edb5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -49,23 +49,6 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] -; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 -; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], 10 -; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 -; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] -; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 -; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 -; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]] ; CHECK: for.inc1286.loopexit: ; CHECK-NEXT: ret void ; @@ -141,24 +124,6 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_INC1286_LOOPEXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[IF_THEN1165_US:%.*]] -; CHECK: if.then1165.us: -; CHECK-NEXT: [[INDVARS_IV1783:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT1784:%.*]], [[IF_THEN1165_US]] ] -; CHECK-NEXT: [[FPTR:%.*]] = load i32, ptr [[C]], align 4 -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_A:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[CONV1177_US:%.*]] = zext i16 [[L_A]] to i32 -; CHECK-NEXT: [[ADD1178_US:%.*]] = add nsw i32 [[CONV1177_US]], [[FPTR]] -; CHECK-NEXT: [[CONV1179_US:%.*]] = trunc i32 [[ADD1178_US]] to i16 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV1783]] -; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IDXPROM1181_US:%.*]] = ashr exact i64 [[L_B]], 32 -; CHECK-NEXT: [[ARRAYIDX1185_US:%.*]] = getelementptr inbounds i16, ptr [[M3]], i64 [[IDXPROM1181_US]] -; CHECK-NEXT: store i16 [[CONV1179_US]], ptr [[ARRAYIDX1185_US]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT1784]] = add nuw nsw i64 [[INDVARS_IV1783]], 1 -; CHECK-NEXT: [[EXITCOND1785:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT1784]], 16 -; CHECK-NEXT: br i1 [[EXITCOND1785]], label [[FOR_INC1286_LOOPEXIT]], label [[IF_THEN1165_US]] ; CHECK: for.inc1286.loopexit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll index c8eecd7..96a25a8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -127,7 +127,8 @@ define void @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll index d5d0c14..bc9cf4f 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll @@ -23,11 +23,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) { ; GFX9-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; GFX9-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; GFX9: middle.block: -; GFX9-NEXT: br label [[FOR_END:%.*]] -; GFX9: scalar.ph: ; GFX9-NEXT: br label [[FOR_BODY:%.*]] -; GFX9: for.body: -; GFX9-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; GFX9: for.end: ; GFX9-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]] ; GFX9-NEXT: [[ADD_LCSSA:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]]) @@ -52,11 +48,7 @@ define half @vectorize_v2f16_loop(ptr addrspace(1) noalias %s) { ; VI-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; VI-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VI: middle.block: -; VI-NEXT: br label [[FOR_END:%.*]] -; VI: scalar.ph: ; VI-NEXT: br label [[FOR_BODY:%.*]] -; VI: for.body: -; VI-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; VI: for.end: ; VI-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x half> [[TMP3]], [[TMP2]] ; VI-NEXT: [[ADD_LCSSA:%.*]] = call fast half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> [[BIN_RDX]]) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll index e83ac2e..58a24ee 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll @@ -36,18 +36,6 @@ define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP10]], 3 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[MUL]], ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: @@ -81,7 +69,4 @@ attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]], [[META5:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.vectorize.width", i32 16} -; CHECK: [[META5]] = !{!"llvm.loop.interleave.count", i32 2} ;. diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll index e52d85c..9a76019 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll @@ -25,21 +25,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; @@ -212,21 +198,7 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 3 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; @@ -273,21 +245,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_023:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[I_023]], 4 -; CHECK-NEXT: [[ADD5:%.*]] = add nuw nsw i32 [[MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i32 [[ADD5]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 5, [[TMP8]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_023]] -; CHECK-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_023]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[END]], label [[FOR_BODY]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index 4cdfcf2..0a4ed7f 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -22,11 +22,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -75,11 +71,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP8]] ; @@ -126,11 +118,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] @@ -177,11 +165,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -228,11 +212,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -279,11 +259,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -330,11 +306,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] @@ -381,11 +353,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index fc79227..029d8bd 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -34,28 +34,11 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[RES_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[RES_0_LCSSA]] ; entry: @@ -112,28 +95,11 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[RES_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011]] -; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP12]] to i32 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011]] -; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[RES_010]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[RES_0_LCSSA]] ; entry: @@ -183,25 +149,13 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = add i32 [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -245,26 +199,14 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 1, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = mul nsw i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -308,26 +250,14 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ -1, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = and i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -371,26 +301,14 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = or i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -434,26 +352,14 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = xor i32 [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -497,26 +403,14 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret float [[R_0_LCSSA]] ; entry: @@ -560,26 +454,14 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP4]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[R_07:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fmul fast float [[TMP7]], [[R_07]] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY:%.*]] ], [ [[TMP6]], [[FOR_BODY]] ] ; CHECK-NEXT: ret float [[R_0_LCSSA]] ; entry: @@ -622,7 +504,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -640,7 +522,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -689,7 +571,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -707,7 +589,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -756,7 +638,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -774,7 +656,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -823,7 +705,7 @@ define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -841,7 +723,7 @@ define i32 @umax_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP7]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index 3426fb1..6ea075f 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -30,17 +30,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; DEFAULT-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -59,17 +48,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; OPTSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; OPTSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -88,17 +66,6 @@ define void @always_vectorize(ptr %p, i32 %x) { ; MINSIZE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[X]] -; MINSIZE-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -386,23 +353,6 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[TMP72:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i8 -; DEFAULT-NEXT: [[MUL:%.*]] = mul i8 [[A]], [[TMP72]] -; DEFAULT-NEXT: [[SHR:%.*]] = lshr i8 [[TMP72]], 1 -; DEFAULT-NEXT: [[MUL5:%.*]] = mul i8 [[SHR]], [[B]] -; DEFAULT-NEXT: [[ADD:%.*]] = add i8 [[MUL5]], [[MUL]] -; DEFAULT-NEXT: [[SHR7:%.*]] = lshr i8 [[TMP72]], 2 -; DEFAULT-NEXT: [[MUL9:%.*]] = mul i8 [[SHR7]], [[C]] -; DEFAULT-NEXT: [[ADD10:%.*]] = add i8 [[ADD]], [[MUL9]] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]] -; DEFAULT-NEXT: store i8 [[ADD10]], ptr [[ARRAYIDX]], align 1 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -502,23 +452,6 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -545,23 +478,6 @@ define void @dont_vectorize_with_minsize() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -588,23 +504,6 @@ define void @dont_vectorize_with_minsize() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; @@ -659,23 +558,6 @@ define void @vectorization_forced() { ; DEFAULT-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; DEFAULT: [[SCALAR_PH:.*]]: -; DEFAULT-NEXT: br label %[[FOR_BODY:.*]] -; DEFAULT: [[FOR_BODY]]: -; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; DEFAULT-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; DEFAULT-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; DEFAULT-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; DEFAULT-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; DEFAULT: [[FOR_COND_CLEANUP]]: ; DEFAULT-NEXT: ret void ; @@ -702,23 +584,6 @@ define void @vectorization_forced() { ; OPTSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; OPTSIZE: [[MIDDLE_BLOCK]]: ; OPTSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; OPTSIZE: [[SCALAR_PH:.*]]: -; OPTSIZE-NEXT: br label %[[FOR_BODY:.*]] -; OPTSIZE: [[FOR_BODY]]: -; OPTSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; OPTSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OPTSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; OPTSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; OPTSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; OPTSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; OPTSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; OPTSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; OPTSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OPTSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OPTSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; OPTSIZE: [[FOR_COND_CLEANUP]]: ; OPTSIZE-NEXT: ret void ; @@ -745,23 +610,6 @@ define void @vectorization_forced() { ; MINSIZE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; MINSIZE: [[MIDDLE_BLOCK]]: ; MINSIZE-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MINSIZE: [[SCALAR_PH:.*]]: -; MINSIZE-NEXT: br label %[[FOR_BODY:.*]] -; MINSIZE: [[FOR_BODY]]: -; MINSIZE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MINSIZE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[BVAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; MINSIZE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[CVAL:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; MINSIZE-NEXT: [[MUL:%.*]] = mul nsw i32 [[BVAL]], [[CVAL]] -; MINSIZE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDVARS_IV]] -; MINSIZE-NEXT: [[AVAL:%.*]] = load i16, ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[TRUNC:%.*]] = trunc i32 [[MUL]] to i16 -; MINSIZE-NEXT: [[ADD:%.*]] = add i16 [[TRUNC]], [[AVAL]] -; MINSIZE-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX4]], align 2 -; MINSIZE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; MINSIZE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; MINSIZE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; MINSIZE: [[FOR_COND_CLEANUP]]: ; MINSIZE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll index 625f7a6..1ae71c8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll @@ -52,7 +52,7 @@ define dso_local void @predicate_loop_hint(ptr noalias nocapture %A, ptr noalias ; CHECK: %index.next = add nuw i64 %index, 4 ; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]] ; -; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP2:![0-9]+]] +; CHECK-NOT: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop entry: br label %for.body @@ -78,9 +78,6 @@ for.body: ; CHECK-NEXT: [[MD_RT_UNROLL_DIS]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK-NEXT: [[SCALAR_LOOP1]] = distinct !{[[SCALAR_LOOP1]], [[MD_RT_UNROLL_DIS]], [[MD_IS_VEC]]} ; CHECK-NEXT: [[VEC_LOOP2]] = distinct !{[[VEC_LOOP2]], [[MD_IS_VEC]], [[MD_RT_UNROLL_DIS]]} -; CHECK-NEXT: [[SCALAR_LOOP2]] = distinct !{[[SCALAR_LOOP2]], [[ORIG_PRED_ENABLED:!.+]], [[ORIG_VEC_ENABLED:!.+]]} -; CHECK-NEXT: [[ORIG_PRED_ENABLED]] = !{!"llvm.loop.vectorize.predicate.enable", i1 true} -; CHECK-NEXT: [[ORIG_VEC_ENABLED]] = !{!"llvm.loop.vectorize.enable", i1 true} !6 = distinct !{!6, !7, !8} !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 0b13343..7afa8ce9 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -33,18 +33,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll index a7f0206..024194d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll @@ -46,19 +46,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFBFMIN-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ZVFBFMIN: [[MIDDLE_BLOCK]]: ; ZVFBFMIN-NEXT: br label %[[EXIT:.*]] -; ZVFBFMIN: [[SCALAR_PH:.*]]: -; ZVFBFMIN-NEXT: br label %[[LOOP:.*]] -; ZVFBFMIN: [[LOOP]]: -; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] -; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] -; ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2 -; ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2 -; ZVFBFMIN-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]] -; ZVFBFMIN-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2 -; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; ZVFBFMIN: [[EXIT]]: ; ZVFBFMIN-NEXT: ret void ; @@ -155,23 +142,6 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; ZVFBFMIN: [[MIDDLE_BLOCK]]: ; ZVFBFMIN-NEXT: br label %[[EXIT:.*]] -; ZVFBFMIN: [[SCALAR_PH:.*]]: -; ZVFBFMIN-NEXT: br label %[[LOOP:.*]] -; ZVFBFMIN: [[LOOP]]: -; ZVFBFMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; ZVFBFMIN-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] -; ZVFBFMIN-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] -; ZVFBFMIN-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]] -; ZVFBFMIN-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2 -; ZVFBFMIN-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2 -; ZVFBFMIN-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP]], align 4 -; ZVFBFMIN-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float -; ZVFBFMIN-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float -; ZVFBFMIN-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]]) -; ZVFBFMIN-NEXT: store float [[FMULADD]], ptr [[C_GEP]], align 4 -; ZVFBFMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; ZVFBFMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; ZVFBFMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; ZVFBFMIN: [[EXIT]]: ; ZVFBFMIN-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 612e7c0..2087218 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -33,24 +33,6 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[DEAD_GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP25]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -106,24 +88,6 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[DEAD_GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -179,27 +143,6 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -262,29 +205,6 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br label %[[THEN_1:.*]] -; CHECK: [[THEN_1]]: -; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -347,31 +267,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br label %[[THEN_1:.*]] -; CHECK: [[THEN_1]]: -; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[ELSE_2:.*]] -; CHECK: [[ELSE_2]]: -; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -450,31 +345,6 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i16 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i16 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br i1 [[IC]], label %[[THEN_1:.*]], label %[[ELSE]] -; CHECK: [[THEN_1]]: -; CHECK-NEXT: [[DEAD_GEP_1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[ELSE_2:.*]] -; CHECK: [[ELSE_2]]: -; CHECK-NEXT: [[DEAD_GEP_2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store i16 [[XOR]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 3 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -537,24 +407,6 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i32 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[P:%.*]] = phi i16 [ [[L]], %[[LOOP_HEADER]] ], [ 99, %[[THEN]] ] -; CHECK-NEXT: store i16 [[P]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -607,24 +459,6 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[XOR1315:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[XOR]] = xor i32 0, 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[C:%.*]] = icmp eq i16 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[P:%.*]] = phi i16 [ [[L]], %[[LOOP_HEADER]] ], [ 99, %[[ELSE]] ] -; CHECK-NEXT: store i16 [[P]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP18]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 96c3a0d..10f8f74 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -42,16 +42,6 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START_EXT]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 111 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -326,21 +316,6 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 false, label %[[LOOP_LATCH]], label %[[THEN:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[NOT_A:%.*]] = xor i32 [[A]], -1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[NOT_A]], %[[THEN]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i32 [[P]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 9 -; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], 322 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -408,21 +383,6 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L_DEAD:%.*]] = load i8, ptr [[GEP_SRC_0]], align 1 -; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]] -; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 -; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 -; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index b6230dc..3fd90b2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -32,18 +32,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -86,21 +75,9 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP9]]) -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i64 [[TMP11]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index d20dd05..01b4502 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -29,18 +29,7 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -61,18 +50,7 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]] -; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -113,20 +91,9 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -147,18 +114,7 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]] -; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -199,20 +155,9 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[DIVREM:%.*]] = urem i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -233,18 +178,7 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[DIVREM:%.*]] = urem i64 [[ELEM]], [[V]] -; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -285,20 +219,9 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[DIVREM:%.*]] = srem i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -319,18 +242,7 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[DIVREM:%.*]] = srem i64 [[ELEM]], [[V]] -; FIXED-NEXT: store i64 [[DIVREM]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -379,26 +291,9 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 -; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; CHECK: do_op: -; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]] -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -422,24 +317,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 -; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; FIXED: do_op: -; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], [[V]] -; FIXED-NEXT: br label [[LATCH]] -; FIXED: latch: -; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; FIXED-NEXT: br label [[LATCH:%.*]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -494,26 +372,9 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 -; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; CHECK: do_op: -; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]] -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -537,24 +398,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[V]], 0 -; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; FIXED: do_op: -; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], [[V]] -; FIXED-NEXT: br label [[LATCH]] -; FIXED: latch: -; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; FIXED-NEXT: br label [[LATCH:%.*]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -601,26 +445,9 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP12]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 -; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; CHECK: do_op: -; CHECK-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], 27 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -641,24 +468,7 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 -; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; FIXED: do_op: -; FIXED-NEXT: [[DIVREM:%.*]] = udiv i64 [[ELEM]], 27 -; FIXED-NEXT: br label [[LATCH]] -; FIXED: latch: -; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; FIXED-NEXT: br label [[LATCH:%.*]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -705,26 +515,9 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP12]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 -; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; CHECK: do_op: -; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], 27 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; CHECK-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -745,24 +538,7 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[C:%.*]] = icmp ne i64 [[ELEM]], 42 -; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; FIXED: do_op: -; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i64 [[ELEM]], 27 -; FIXED-NEXT: br label [[LATCH]] -; FIXED: latch: -; FIXED-NEXT: [[PHI:%.*]] = phi i64 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; FIXED-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; FIXED-NEXT: br label [[LATCH:%.*]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -815,26 +591,9 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128 -; CHECK-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; CHECK: do_op: -; CHECK-NEXT: [[DIVREM:%.*]] = sdiv i8 [[ELEM]], -1 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; CHECK-NEXT: store i8 [[PHI]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -856,24 +615,7 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[FOR_END:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[FOR_BODY:%.*]] -; FIXED: for.body: -; FIXED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; FIXED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; FIXED-NEXT: [[ELEM:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; FIXED-NEXT: [[C:%.*]] = icmp ne i8 [[ELEM]], -128 -; FIXED-NEXT: br i1 [[C]], label [[DO_OP:%.*]], label [[LATCH]] -; FIXED: do_op: -; FIXED-NEXT: [[DIVREM:%.*]] = sdiv i8 [[ELEM]], -1 -; FIXED-NEXT: br label [[LATCH]] -; FIXED: latch: -; FIXED-NEXT: [[PHI:%.*]] = phi i8 [ [[ELEM]], [[FOR_BODY]] ], [ [[DIVREM]], [[DO_OP]] ] -; FIXED-NEXT: store i8 [[PHI]], ptr [[ARRAYIDX]], align 1 -; FIXED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; FIXED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; FIXED-NEXT: br label [[LATCH:%.*]] ; FIXED: for.end: ; FIXED-NEXT: ret void ; @@ -945,7 +687,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i8> [[VEC_IND]], [[BROADCAST_SPLAT7]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 2 @@ -972,7 +714,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 0 ; CHECK-NEXT: [[IV_NEXT_TRUNC]] = trunc i16 [[IV_NEXT]] to i8 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = phi i32 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MERGE_LCSSA]] @@ -1004,28 +746,9 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; FIXED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; FIXED: middle.block: ; FIXED-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: -; FIXED-NEXT: br label [[LOOP_HEADER:%.*]] -; FIXED: loop.header: -; FIXED-NEXT: [[IV:%.*]] = phi i16 [ -12, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; FIXED-NEXT: [[NARROW_IV:%.*]] = phi i8 [ -12, [[SCALAR_PH]] ], [ [[IV_NEXT_TRUNC:%.*]], [[LOOP_LATCH]] ] -; FIXED-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]] -; FIXED: then: -; FIXED-NEXT: [[UD:%.*]] = udiv i8 [[NARROW_IV]], [[X]] -; FIXED-NEXT: [[UD_EXT:%.*]] = zext i8 [[UD]] to i16 -; FIXED-NEXT: [[SD:%.*]] = sdiv i16 [[UD_EXT]], [[Y]] -; FIXED-NEXT: [[SD_EXT:%.*]] = sext i16 [[SD]] to i32 -; FIXED-NEXT: br label [[LOOP_LATCH]] -; FIXED: loop.latch: -; FIXED-NEXT: [[MERGE:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[SD_EXT]], [[THEN]] ] -; FIXED-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 -; FIXED-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; FIXED-NEXT: [[IV_NEXT_TRUNC]] = trunc i16 [[IV_NEXT]] to i8 -; FIXED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] +; FIXED-NEXT: br label [[LOOP_LATCH:%.*]] ; FIXED: exit: -; FIXED-NEXT: [[MERGE_LCSSA:%.*]] = phi i32 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; FIXED-NEXT: ret i32 [[MERGE_LCSSA]] +; FIXED-NEXT: ret i32 [[TMP7]] ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 0a60556..21272cb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -30,16 +30,7 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]] -; CHECK-NEXT: store i64 [[IV1]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; @@ -84,18 +75,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[B]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[ADDR]], i64 8 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store ptr [[ADDR]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll index a2ab7c4..143a51d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll @@ -46,19 +46,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFHMIN-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; ZVFHMIN: [[MIDDLE_BLOCK]]: ; ZVFHMIN-NEXT: br label %[[EXIT:.*]] -; ZVFHMIN: [[SCALAR_PH:.*]]: -; ZVFHMIN-NEXT: br label %[[LOOP:.*]] -; ZVFHMIN: [[LOOP]]: -; ZVFHMIN-NEXT: [[I:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; ZVFHMIN-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]] -; ZVFHMIN-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]] -; ZVFHMIN-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2 -; ZVFHMIN-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2 -; ZVFHMIN-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]] -; ZVFHMIN-NEXT: store half [[Z]], ptr [[A_GEP]], align 2 -; ZVFHMIN-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; ZVFHMIN-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; ZVFHMIN-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; ZVFHMIN: [[EXIT]]: ; ZVFHMIN-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 5df4f70..1c6954c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -116,17 +116,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RVA23-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23: middle.block: -; RVA23-NEXT: br label [[EXIT:%.*]] -; RVA23: scalar.ph: ; RVA23-NEXT: br label [[LOOP:%.*]] -; RVA23: loop: -; RVA23-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; RVA23-NEXT: [[TMP8:%.*]] = mul i64 [[IV]], 7 -; RVA23-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; RVA23-NEXT: store i8 0, ptr [[ADD_PTR]], align 1 -; RVA23-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; RVA23-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 585 -; RVA23-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; RVA23: exit: ; RVA23-NEXT: ret void ; @@ -153,17 +143,7 @@ define void @predicated_strided_store(ptr %start) { ; RVA23ZVL1024B-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RVA23ZVL1024B-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; RVA23ZVL1024B: middle.block: -; RVA23ZVL1024B-NEXT: br label [[EXIT:%.*]] -; RVA23ZVL1024B: scalar.ph: ; RVA23ZVL1024B-NEXT: br label [[LOOP:%.*]] -; RVA23ZVL1024B: loop: -; RVA23ZVL1024B-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; RVA23ZVL1024B-NEXT: [[TMP8:%.*]] = mul i64 [[IV]], 7 -; RVA23ZVL1024B-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] -; RVA23ZVL1024B-NEXT: store i8 0, ptr [[ADD_PTR]], align 1 -; RVA23ZVL1024B-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; RVA23ZVL1024B-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 585 -; RVA23ZVL1024B-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; RVA23ZVL1024B: exit: ; RVA23ZVL1024B-NEXT: ret void ; @@ -216,21 +196,7 @@ define void @store_to_addr_generated_from_invariant_addr(ptr noalias %p0, ptr no ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P1]], i64 [[IV]] -; CHECK-NEXT: store ptr [[P0]], ptr [[ARRAYIDX11]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[P2]], align 4 -; CHECK-NEXT: [[BITS_TO_GO:%.*]] = getelementptr i8, ptr [[P3]], i64 [[TMP10]] -; CHECK-NEXT: store i32 0, ptr [[BITS_TO_GO]], align 4 -; CHECK-NEXT: store i32 0, ptr [[BITS_TO_GO]], align 4 -; CHECK-NEXT: store i8 0, ptr [[BITS_TO_GO]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 4d97a65..4ccec2c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -153,21 +153,6 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV_0:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[IV_0_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_OR:%.*]] = or i32 [[IV_2]], [[IV_0]] -; CHECK-NEXT: [[IV_OR_EXT:%.*]] = sext i32 [[IV_OR]] to i64 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_OR_EXT]] -; CHECK-NEXT: store ptr [[GEP_SRC]], ptr [[DST]], align 8 -; CHECK-NEXT: [[IV_0_NEXT]] = add i32 [[IV_0]], 2 -; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[IV_1]], 1 -; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 2 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_1]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -228,27 +213,6 @@ define void @redundant_iv_trunc_for_cse(ptr noalias %src, ptr noalias %dst, i64 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 -; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[L]], 0 -; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: br i1 [[C_0]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[TRUNC_IV_2:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: [[SHL_IV:%.*]] = shl i32 [[TRUNC_IV_2]], 16 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[SHL_IV]], %[[THEN]] ], [ [[TRUNC_IV]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[TRUNC_P:%.*]] = trunc i32 [[P]] to i8 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 [[TRUNC_P]], ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 63d1af38..7e6e45f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -133,24 +133,11 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP10]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] -; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; IF-EVL-OUTLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] -; IF-EVL-OUTLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; IF-EVL-OUTLOOP: for.cond.cleanup.loopexit: -; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_COND_CLEANUP]] ; IF-EVL-OUTLOOP: for.cond.cleanup: -; IF-EVL-OUTLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; IF-EVL-OUTLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP12]], [[FOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: ret i32 [[R_0_LCSSA]] ; ; IF-EVL-INLOOP-LABEL: @add_i16_i32( @@ -176,24 +163,11 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; IF-EVL-INLOOP: scalar.ph: ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] -; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] -; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] ; IF-EVL-INLOOP: for.cond.cleanup.loopexit: -; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]] ; IF-EVL-INLOOP: for.cond.cleanup: -; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP11]], [[FOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: ret i32 [[R_0_LCSSA]] ; entry: @@ -330,22 +304,9 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-OUTLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP19]], [[RDX]] -; IF-EVL-OUTLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] -; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; IF-EVL-OUTLOOP: for.end: -; IF-EVL-OUTLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-OUTLOOP-NEXT: ret i32 [[SMIN_LCSSA]] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[TMP18]] ; ; IF-EVL-INLOOP-LABEL: @smin( ; IF-EVL-INLOOP-NEXT: entry: @@ -367,22 +328,9 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-INLOOP: scalar.ph: ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-INLOOP-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP16]], [[RDX]] -; IF-EVL-INLOOP-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP16]], i32 [[RDX]] -; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; IF-EVL-INLOOP: for.end: -; IF-EVL-INLOOP-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; IF-EVL-INLOOP-NEXT: ret i32 [[SMIN_LCSSA]] +; IF-EVL-INLOOP-NEXT: ret i32 [[RDX_MINMAX]] ; ; IF-EVL-LABEL: @smin( ; IF-EVL-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 43560d2..31c8b74 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -31,24 +31,7 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; CHECK-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; CHECK-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -73,24 +56,7 @@ define void @load_store_factor2_i32(ptr %p) { ; FIXED-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; FIXED-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -121,24 +87,7 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; SCALABLE-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -194,24 +143,7 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -236,24 +168,7 @@ define void @load_store_factor2_i64(ptr %p) { ; FIXED-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -284,24 +199,7 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -359,29 +257,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; CHECK-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; CHECK-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3 -; CHECK-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -410,29 +286,7 @@ define void @load_store_factor3_i32(ptr %p) { ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; FIXED-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; FIXED-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; FIXED-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; FIXED-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4 -; FIXED-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3 -; FIXED-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -465,29 +319,7 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; SCALABLE-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; SCALABLE-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; SCALABLE-NEXT: [[Y1:%.*]] = add i32 [[X1]], 2 -; SCALABLE-NEXT: store i32 [[Y1]], ptr [[Q1]], align 4 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i32, ptr [[Q2]], align 4 -; SCALABLE-NEXT: [[Y2:%.*]] = add i32 [[X2]], 3 -; SCALABLE-NEXT: store i32 [[Y2]], ptr [[Q2]], align 4 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -551,29 +383,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -602,29 +412,7 @@ define void @load_store_factor3_i64(ptr %p) { ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -657,29 +445,7 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 3 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -745,34 +511,7 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -803,34 +542,7 @@ define void @load_store_factor4(ptr %p) { ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -865,34 +577,7 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 4 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -966,39 +651,7 @@ define void @load_store_factor5(ptr %p) { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1033,39 +686,7 @@ define void @load_store_factor5(ptr %p) { ; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -1102,39 +723,7 @@ define void @load_store_factor5(ptr %p) { ; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 5 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -1216,44 +805,7 @@ define void @load_store_factor6(ptr %p) { ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; CHECK-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; CHECK-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; CHECK-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; CHECK-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; CHECK-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1291,44 +843,7 @@ define void @load_store_factor6(ptr %p) { ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -1367,44 +882,7 @@ define void @load_store_factor6(ptr %p) { ; SCALABLE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 6 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -1494,49 +972,7 @@ define void @load_store_factor7(ptr %p) { ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; CHECK-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; CHECK-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; CHECK-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; CHECK-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; CHECK-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; CHECK-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; CHECK-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; CHECK-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; CHECK-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; CHECK-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1578,49 +1014,7 @@ define void @load_store_factor7(ptr %p) { ; FIXED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; FIXED-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; FIXED-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; FIXED-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; FIXED-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; FIXED-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -1661,49 +1055,7 @@ define void @load_store_factor7(ptr %p) { ; SCALABLE-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = mul i64 [[I]], 7 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; SCALABLE-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; SCALABLE-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; SCALABLE-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; SCALABLE-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -1801,54 +1153,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; CHECK-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; CHECK-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; CHECK-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; CHECK-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; CHECK-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; CHECK-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; CHECK-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; CHECK-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; CHECK-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; CHECK-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; CHECK-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; CHECK-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; CHECK-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; CHECK-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; CHECK-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; CHECK-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; CHECK-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; CHECK-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; CHECK-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; CHECK-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; CHECK-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; CHECK-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; CHECK-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; CHECK-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; CHECK-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; CHECK-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; CHECK-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; CHECK-NEXT: [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1 -; CHECK-NEXT: [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]] -; CHECK-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8 -; CHECK-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8 -; CHECK-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1891,54 +1196,7 @@ define void @load_store_factor8(ptr %p) { ; FIXED-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; FIXED-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; FIXED-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; FIXED-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; FIXED-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; FIXED-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; FIXED-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; FIXED-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; FIXED-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; FIXED-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; FIXED-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; FIXED-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; FIXED-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; FIXED-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; FIXED-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; FIXED-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; FIXED-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; FIXED-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; FIXED-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; FIXED-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; FIXED-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; FIXED-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; FIXED-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; FIXED-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; FIXED-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; FIXED-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; FIXED-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; FIXED-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; FIXED-NEXT: [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1 -; FIXED-NEXT: [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]] -; FIXED-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8 -; FIXED-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8 -; FIXED-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -1981,54 +1239,7 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 3 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[Y0:%.*]] = add i64 [[X0]], 1 -; SCALABLE-NEXT: store i64 [[Y0]], ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[Y1:%.*]] = add i64 [[X1]], 2 -; SCALABLE-NEXT: store i64 [[Y1]], ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[OFFSET2:%.*]] = add i64 [[OFFSET1]], 1 -; SCALABLE-NEXT: [[Q2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET2]] -; SCALABLE-NEXT: [[X2:%.*]] = load i64, ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[Y2:%.*]] = add i64 [[X2]], 3 -; SCALABLE-NEXT: store i64 [[Y2]], ptr [[Q2]], align 8 -; SCALABLE-NEXT: [[OFFSET3:%.*]] = add i64 [[OFFSET2]], 1 -; SCALABLE-NEXT: [[Q3:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET3]] -; SCALABLE-NEXT: [[X3:%.*]] = load i64, ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[Y3:%.*]] = add i64 [[X3]], 4 -; SCALABLE-NEXT: store i64 [[Y3]], ptr [[Q3]], align 8 -; SCALABLE-NEXT: [[OFFSET4:%.*]] = add i64 [[OFFSET3]], 1 -; SCALABLE-NEXT: [[Q4:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET4]] -; SCALABLE-NEXT: [[X4:%.*]] = load i64, ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[Y4:%.*]] = add i64 [[X4]], 5 -; SCALABLE-NEXT: store i64 [[Y4]], ptr [[Q4]], align 8 -; SCALABLE-NEXT: [[OFFSET5:%.*]] = add i64 [[OFFSET4]], 1 -; SCALABLE-NEXT: [[Q5:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET5]] -; SCALABLE-NEXT: [[X5:%.*]] = load i64, ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[Y5:%.*]] = add i64 [[X5]], 6 -; SCALABLE-NEXT: store i64 [[Y5]], ptr [[Q5]], align 8 -; SCALABLE-NEXT: [[OFFSET6:%.*]] = add i64 [[OFFSET5]], 1 -; SCALABLE-NEXT: [[Q6:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET6]] -; SCALABLE-NEXT: [[X6:%.*]] = load i64, ptr [[Q6]], align 8 -; SCALABLE-NEXT: [[Y6:%.*]] = add i64 [[X6]], 7 -; SCALABLE-NEXT: store i64 [[Y6]], ptr [[Q6]], align 8 -; SCALABLE-NEXT: [[OFFSET7:%.*]] = add i64 [[OFFSET6]], 1 -; SCALABLE-NEXT: [[Q7:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET7]] -; SCALABLE-NEXT: [[X7:%.*]] = load i64, ptr [[Q7]], align 8 -; SCALABLE-NEXT: [[Y7:%.*]] = add i64 [[X7]], 8 -; SCALABLE-NEXT: store i64 [[Y7]], ptr [[Q7]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -2118,23 +1329,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; CHECK-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] -; CHECK-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] -; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -2157,23 +1352,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; FIXED-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] -; FIXED-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] -; FIXED-NEXT: store i32 [[RES]], ptr [[DST]], align 4 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -2202,23 +1381,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 -; SCALABLE-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] -; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] -; SCALABLE-NEXT: store i32 [[RES]], ptr [[DST]], align 4 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; @@ -2273,23 +1436,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] -; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -2312,23 +1459,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; FIXED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXED: middle.block: -; FIXED-NEXT: br label [[EXIT:%.*]] -; FIXED: scalar.ph: ; FIXED-NEXT: br label [[LOOP:%.*]] -; FIXED: loop: -; FIXED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; FIXED-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; FIXED-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; FIXED-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; FIXED-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; FIXED-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; FIXED-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; FIXED-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; FIXED-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] -; FIXED-NEXT: store i64 [[RES]], ptr [[DST]], align 8 -; FIXED-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; FIXED-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; FIXED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; FIXED: exit: ; FIXED-NEXT: ret void ; @@ -2357,23 +1488,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: br label [[EXIT:%.*]] -; SCALABLE: scalar.ph: ; SCALABLE-NEXT: br label [[LOOP:%.*]] -; SCALABLE: loop: -; SCALABLE-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; SCALABLE-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; SCALABLE-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] -; SCALABLE-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 8 -; SCALABLE-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 -; SCALABLE-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] -; SCALABLE-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 8 -; SCALABLE-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; SCALABLE-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] -; SCALABLE-NEXT: store i64 [[RES]], ptr [[DST]], align 8 -; SCALABLE-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; SCALABLE-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; SCALABLE-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; SCALABLE: exit: ; SCALABLE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index a30aebb..ef0f0cf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -96,7 +96,8 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_DATA: middle.block: ; PREDICATED_DATA-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_DATA: scalar.ph: +; PREDICATED_DATA: for.end: +; PREDICATED_DATA-NEXT: ret void ; ; PREDICATED_DATA-WITH-EVL-LABEL: define void @masked_strided_factor2 ; PREDICATED_DATA-WITH-EVL-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) #[[ATTR0:[0-9]+]] { @@ -135,9 +136,13 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP10]], <vscale x 32 x i1> [[INTERLEAVED_MASK4]], i32 [[INTERLEAVE_EVL3]]) ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP12:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 +; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: ; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_DATA-WITH-EVL: scalar.ph: +; PREDICATED_DATA-WITH-EVL: for.end: +; PREDICATED_DATA-WITH-EVL-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 @@ -270,10 +275,11 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP1]] ; PREDICATED_DATA-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_DATA-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; PREDICATED_DATA-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PREDICATED_DATA-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; PREDICATED_DATA: middle.block: ; PREDICATED_DATA-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_DATA: scalar.ph: +; PREDICATED_DATA: for.end: +; PREDICATED_DATA-NEXT: ret void ; ; PREDICATED_DATA-WITH-EVL-LABEL: define void @masked_strided_factor4 ; PREDICATED_DATA-WITH-EVL-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) #[[ATTR0]] { @@ -316,9 +322,13 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_DATA-WITH-EVL-NEXT: call void @llvm.vp.store.nxv64i8.p0(<vscale x 64 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP15]], <vscale x 64 x i1> [[INTERLEAVED_MASK4]], i32 [[INTERLEAVE_EVL3]]) ; PREDICATED_DATA-WITH-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP1]], [[EVL_BASED_IV]] ; PREDICATED_DATA-WITH-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP1]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 16 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]] +; PREDICATED_DATA-WITH-EVL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 +; PREDICATED_DATA-WITH-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_DATA-WITH-EVL: middle.block: ; PREDICATED_DATA-WITH-EVL-NEXT: br label [[FOR_END:%.*]] -; PREDICATED_DATA-WITH-EVL: scalar.ph: +; PREDICATED_DATA-WITH-EVL: for.end: +; PREDICATED_DATA-WITH-EVL-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index cf2f78b..328ee16 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -62,18 +62,7 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; LMUL2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL2: middle.block: -; LMUL2-NEXT: br label [[FOR_END:%.*]] -; LMUL2: scalar.ph: ; LMUL2-NEXT: br label [[FOR_BODY:%.*]] -; LMUL2: for.body: -; LMUL2-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; LMUL2-NEXT: [[Q:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[IV]] -; LMUL2-NEXT: [[V:%.*]] = load i64, ptr [[Q]], align 8 -; LMUL2-NEXT: [[W:%.*]] = add i64 [[V]], 1 -; LMUL2-NEXT: store i64 [[W]], ptr [[Q]], align 8 -; LMUL2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; LMUL2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; LMUL2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; LMUL2: for.end: ; LMUL2-NEXT: ret void ; @@ -96,18 +85,7 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; LMUL4-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL4: middle.block: -; LMUL4-NEXT: br label [[FOR_END:%.*]] -; LMUL4: scalar.ph: ; LMUL4-NEXT: br label [[FOR_BODY:%.*]] -; LMUL4: for.body: -; LMUL4-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; LMUL4-NEXT: [[Q:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[IV]] -; LMUL4-NEXT: [[V:%.*]] = load i64, ptr [[Q]], align 8 -; LMUL4-NEXT: [[W:%.*]] = add i64 [[V]], 1 -; LMUL4-NEXT: store i64 [[W]], ptr [[Q]], align 8 -; LMUL4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; LMUL4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; LMUL4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; LMUL4: for.end: ; LMUL4-NEXT: ret void ; @@ -130,18 +108,7 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; LMUL8-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL8: middle.block: -; LMUL8-NEXT: br label [[FOR_END:%.*]] -; LMUL8: scalar.ph: ; LMUL8-NEXT: br label [[FOR_BODY:%.*]] -; LMUL8: for.body: -; LMUL8-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; LMUL8-NEXT: [[Q:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[IV]] -; LMUL8-NEXT: [[V:%.*]] = load i64, ptr [[Q]], align 8 -; LMUL8-NEXT: [[W:%.*]] = add i64 [[V]], 1 -; LMUL8-NEXT: store i64 [[W]], ptr [[Q]], align 8 -; LMUL8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; LMUL8-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; LMUL8-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; LMUL8: for.end: ; LMUL8-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 53907fa..8ef53ca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -133,21 +133,7 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP7]], ptr align 1 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[I_08]] -; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[I_08]] -; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -186,21 +172,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[I_08]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -240,21 +212,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP11]], ptr align 1 [[TMP4]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP7]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[I_08]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP8]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -293,21 +251,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP7]], ptr align 1 [[DST]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]] -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP8]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]] -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP9]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index ae6c90c..06b47aa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -40,25 +40,7 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: -; VLENUNK-NEXT: br label [[FOR_END:%.*]] -; VLENUNK: scalar.ph: -; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] -; VLENUNK: for.body: -; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; VLENUNK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 512 -; VLENUNK-NEXT: br i1 [[ICMP]], label [[DO_LOAD:%.*]], label [[LATCH]] -; VLENUNK: do_load: -; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; VLENUNK-NEXT: br label [[LATCH]] -; VLENUNK: latch: -; VLENUNK-NEXT: [[PHI:%.*]] = phi i32 [ [[ELEM]], [[DO_LOAD]] ], [ 0, [[FOR_BODY]] ] -; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[PHI]], [[V]] -; VLENUNK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; VLENUNK-NEXT: br label [[LATCH:%.*]] ; VLENUNK: for.end: ; VLENUNK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index e0bd8aa..0a9b1e0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -108,7 +108,8 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP13]], [[TMP12]] ; FIXED-V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]]) ; FIXED-V-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-V: scalar.ph: +; FIXED-V: for.exit: +; FIXED-V-NEXT: ret i32 [[TMP15]] ; ; FIXED-ZVQDOTQ-LABEL: define i32 @vqdot( ; FIXED-ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { @@ -143,7 +144,8 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; FIXED-ZVQDOTQ-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) ; FIXED-ZVQDOTQ-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-ZVQDOTQ: scalar.ph: +; FIXED-ZVQDOTQ: for.exit: +; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body @@ -263,12 +265,13 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[TMP13]] = add <8 x i32> [[TMP11]], [[VEC_PHI1]] ; FIXED-V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXED-V: middle.block: ; FIXED-V-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP13]], [[TMP12]] ; FIXED-V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]]) ; FIXED-V-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-V: scalar.ph: +; FIXED-V: for.exit: +; FIXED-V-NEXT: ret i32 [[TMP15]] ; ; FIXED-ZVQDOTQ-LABEL: define i32 @vqdotu( ; FIXED-ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -298,12 +301,13 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXED-ZVQDOTQ: middle.block: ; FIXED-ZVQDOTQ-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; FIXED-ZVQDOTQ-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) ; FIXED-ZVQDOTQ-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-ZVQDOTQ: scalar.ph: +; FIXED-ZVQDOTQ: for.exit: +; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body @@ -423,12 +427,13 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[TMP13]] = add <8 x i32> [[TMP11]], [[VEC_PHI1]] ; FIXED-V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED-V: middle.block: ; FIXED-V-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP13]], [[TMP12]] ; FIXED-V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]]) ; FIXED-V-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-V: scalar.ph: +; FIXED-V: for.exit: +; FIXED-V-NEXT: ret i32 [[TMP15]] ; ; FIXED-ZVQDOTQ-LABEL: define i32 @vqdotsu( ; FIXED-ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -458,12 +463,13 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED-ZVQDOTQ: middle.block: ; FIXED-ZVQDOTQ-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; FIXED-ZVQDOTQ-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) ; FIXED-ZVQDOTQ-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-ZVQDOTQ: scalar.ph: +; FIXED-ZVQDOTQ: for.exit: +; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body @@ -582,12 +588,13 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[TMP13]] = add <8 x i32> [[TMP11]], [[VEC_PHI1]] ; FIXED-V-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-V-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; FIXED-V-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXED-V: middle.block: ; FIXED-V-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP13]], [[TMP12]] ; FIXED-V-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX]]) ; FIXED-V-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-V: scalar.ph: +; FIXED-V: for.exit: +; FIXED-V-NEXT: ret i32 [[TMP15]] ; ; FIXED-ZVQDOTQ-LABEL: define i32 @vqdotsu2( ; FIXED-ZVQDOTQ-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { @@ -617,12 +624,13 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[PARTIAL_REDUCE5]] = call <2 x i32> @llvm.vector.partial.reduce.add.v2i32.v8i32(<2 x i32> [[VEC_PHI1]], <8 x i32> [[TMP11]]) ; FIXED-ZVQDOTQ-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; FIXED-ZVQDOTQ-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; FIXED-ZVQDOTQ-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXED-ZVQDOTQ: middle.block: ; FIXED-ZVQDOTQ-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[PARTIAL_REDUCE5]], [[PARTIAL_REDUCE]] ; FIXED-ZVQDOTQ-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) ; FIXED-ZVQDOTQ-NEXT: br label [[FOR_EXIT:%.*]] -; FIXED-ZVQDOTQ: scalar.ph: +; FIXED-ZVQDOTQ: for.exit: +; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 782c2f6..65928f8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -49,30 +49,7 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C_1:%.*]] = icmp ule i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[C_1]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]] -; CHECK: then.1: -; CHECK-NEXT: [[C_2:%.*]] = icmp ule i64 [[IV]], [[B]] -; CHECK-NEXT: br i1 [[C_2]], label [[ELSE_1]], label [[MERGE:%.*]] -; CHECK: else.1: -; CHECK-NEXT: [[C_3:%.*]] = icmp ule i64 [[IV]], [[C]] -; CHECK-NEXT: br i1 [[C_3]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] -; CHECK: then.2: -; CHECK-NEXT: br label [[MERGE]] -; CHECK: merge: -; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ poison, [[THEN_1]] ], [ [[IV]], [[THEN_2]] ] -; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[IDX]] -; CHECK-NEXT: store i16 0, ptr [[GETELEMENTPTR]], align 2 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[IV]], 1000 -; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 3739f85..8d4d282 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -37,27 +37,7 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT1:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_COND1:%.*]] -; CHECK: for.cond: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH1:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 -; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 -; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 -; CHECK-NEXT: br i1 [[CMP_SLT]], label [[COND_FALSE:%.*]], label [[FOR_BODY]] -; CHECK: cond.false: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 -; CHECK-NEXT: br label [[FOR_BODY]] -; CHECK: for.body: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], [[FOR_COND1]] ], [ [[ZEXT]], [[COND_FALSE]] ] -; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8 -; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 9b6bc68..735fb769 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -29,20 +29,8 @@ define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ADD_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -85,20 +73,8 @@ define i32 @sub(ptr %a, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 1024, %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[SUB]] = sub i32 [[RDX]], [[X]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUB_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %loop @@ -144,23 +120,8 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[SUB:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[RDX]], [[X]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[SUB]] = sub i32 [[ADD]], [[Y]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUB_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP8]] ; entry: br label %loop @@ -209,20 +170,8 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[OR:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[OR]] = or i32 [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[OR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -267,20 +216,8 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[AND:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[AND]] = and i32 [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[AND_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -325,20 +262,8 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[XOR]] = xor i32 [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[XOR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: br label %for.body @@ -384,21 +309,8 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[SUM_010]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP12]] ; entry: br label %for.body @@ -445,21 +357,8 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP11]], [[SUM_010]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP12]] ; entry: br label %for.body @@ -505,20 +404,8 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[ADD_LCSSA]] +; CHECK-NEXT: ret float [[TMP11]] ; entry: br label %for.body @@ -561,20 +448,8 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ 0xH0000, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP10]], [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret half [[ADD_LCSSA]] +; CHECK-NEXT: ret half [[TMP11]] ; entry: br label %for.body @@ -744,21 +619,8 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt float [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret float [[TMP12]] ; entry: br label %for.body @@ -803,21 +665,8 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ 0xH0000, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt half [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret half [[TMP12]] ; entry: br label %for.body @@ -862,21 +711,8 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ 0xR0000, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt bfloat [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret bfloat [[TMP12]] ; entry: br label %for.body @@ -923,21 +759,8 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt float [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret float [[TMP12]] ; entry: br label %for.body @@ -982,21 +805,8 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ 0xH0000, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt half [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret half [[TMP12]] ; entry: br label %for.body @@ -1041,21 +851,8 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ 0xR0000, %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt bfloat [[TMP11]], [[SUM_07]] -; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]] +; CHECK-NEXT: ret bfloat [[TMP12]] ; entry: br label %for.body @@ -1243,22 +1040,8 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP11]], float [[TMP12]], float [[SUM_07]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[MULADD_LCSSA]] +; CHECK-NEXT: ret float [[TMP16]] ; entry: br label %for.body @@ -1305,22 +1088,8 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, <vscale x 8 x half> [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ 0xH0000, %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP11]], half [[TMP12]], half [[SUM_07]]) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP16]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret half [[MULADD_LCSSA]] +; CHECK-NEXT: ret half [[TMP16]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll index 93c0a74..850a6cb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll @@ -58,36 +58,6 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 -; CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] -; CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 -; CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 -; CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] -; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 -; CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 -; CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] -; CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 -; CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 -; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] -; CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 -; CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 -; CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 -; CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 -; CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 -; CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 -; CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 -; CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -145,36 +115,6 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) { ; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; NO-REG-PRESSURE-CHECK: [[MIDDLE_BLOCK]]: ; NO-REG-PRESSURE-CHECK-NEXT: br label %[[EXIT:.*]] -; NO-REG-PRESSURE-CHECK: [[SCALAR_PH:.*]]: -; NO-REG-PRESSURE-CHECK-NEXT: br label %[[LOOP:.*]] -; NO-REG-PRESSURE-CHECK: [[LOOP]]: -; NO-REG-PRESSURE-CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ] -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ] -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ] -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]] -; NO-REG-PRESSURE-CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]] -; NO-REG-PRESSURE-CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]] -; NO-REG-PRESSURE-CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3 -; NO-REG-PRESSURE-CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]] -; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0 -; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1 -; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2 -; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2 -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3 -; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4 -; NO-REG-PRESSURE-CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024 -; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; NO-REG-PRESSURE-CHECK: [[EXIT]]: ; NO-REG-PRESSURE-CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll index 7b8404a..b80368d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/remark-reductions.ll @@ -21,18 +21,8 @@ define float @s311(float %a_0, float %s311_sum) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ [[S311_SUM]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED_NEXT]] = fadd float [[A_0]], [[RED]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1200 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index a165dde..5ca9bfd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -53,10 +53,9 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: -; RV64-NEXT: br [[EXIT:label %.*]] -; RV64: [[SCALAR_PH:.*:]] -; RV64-NEXT: br label %[[FOR_BODY:.*]] -; RV64: [[FOR_BODY]]: +; RV64-NEXT: br label %[[EXIT:.*]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void ; ; RV32-LABEL: define void @vector_reverse_i32( ; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { @@ -93,10 +92,9 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: -; RV32-NEXT: br [[EXIT:label %.*]] -; RV32: [[SCALAR_PH:.*:]] -; RV32-NEXT: br label %[[FOR_BODY:.*]] -; RV32: [[FOR_BODY]]: +; RV32-NEXT: br label %[[EXIT:.*]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void ; ; RV64-UF2-LABEL: define void @vector_reverse_i32( ; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { @@ -718,10 +716,9 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: -; RV64-NEXT: br [[EXIT:label %.*]] -; RV64: [[SCALAR_PH:.*:]] -; RV64-NEXT: br label %[[FOR_BODY:.*]] -; RV64: [[FOR_BODY]]: +; RV64-NEXT: br label %[[EXIT:.*]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void ; ; RV32-LABEL: define void @vector_reverse_f32_simplify( ; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { @@ -758,10 +755,9 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; RV32-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; RV32: [[MIDDLE_BLOCK]]: -; RV32-NEXT: br [[EXIT:label %.*]] -; RV32: [[SCALAR_PH:.*:]] -; RV32-NEXT: br label %[[FOR_BODY:.*]] -; RV32: [[FOR_BODY]]: +; RV32-NEXT: br label %[[EXIT:.*]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void ; ; RV64-UF2-LABEL: define void @vector_reverse_f32_simplify( ; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index ecde164..e046816 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -28,19 +28,7 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -81,19 +69,7 @@ define void @test_may_clobber(ptr %p) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -137,19 +113,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -193,19 +157,7 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; CHECK-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 -; CHECK-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index 544ddc5..7330ce6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -27,18 +27,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -84,18 +73,7 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -179,18 +157,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] -; CHECK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -235,23 +202,9 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP9]]) -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i64 [[TMP11]] ; entry: br label %for.body @@ -292,16 +245,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -340,16 +284,7 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index a596c63..3c90908 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -28,18 +28,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -84,18 +73,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] -; CHECK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -140,23 +118,9 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP11]]) -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 -; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i64 [[TMP14]] ; entry: br label %for.body @@ -197,16 +161,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -246,17 +201,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -356,18 +301,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 4bfe9a4..8971b0c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -29,21 +29,8 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0 ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[A:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP12]], [[X]] -; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[COND_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -91,21 +78,8 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0 ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[A:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP12]], [[X]] -; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[COND_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -151,21 +125,8 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 7, i32 3 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ 3, %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 7 -; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[DOTLCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -211,21 +172,8 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[B]], i32 [[A]] ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[A]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 [[B]] -; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[DOTLCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -271,21 +219,8 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 1, i32 2 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ 2, %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ueq float [[TMP15]], 3.000000e+00 -; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 1 -; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] -; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[DOTLCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -373,29 +308,8 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 0 ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %[[FOR_INC]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 35 -; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP15]], 2 -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] -; CHECK-NEXT: br label %[[FOR_INC]] -; CHECK: [[FOR_INC]]: -; CHECK-NEXT: [[R_1]] = phi i32 [ [[R_012]], %[[FOR_BODY]] ], [ [[SPEC_SELECT]], %[[IF_THEN]] ] -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]] ; CHECK: [[FOR_END_LOOPEXIT]]: -; CHECK-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[R_1_LCSSA]] +; CHECK-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index ca1c710..2fbc73e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -31,19 +31,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[SCALAR_PH:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH1:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], 8 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[SCALAR_PH]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -147,20 +135,7 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[OFFSET:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 -; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; CHECK-NEXT: store i32 [[Y0]], ptr [[Q0]], align 4 -; CHECK-NEXT: [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], 64 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -264,19 +239,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[SCALAR_PH:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH1:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[P]], [[SCALAR_PH1]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[Y0:%.*]] = add i32 [[X0]], 1 -; CHECK-NEXT: store i32 [[Y0]], ptr [[PTR]], align 4 -; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 -; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[SCALAR_PH]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1357,18 +1320,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; NOSTRIDED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; NOSTRIDED: middle.block: -; NOSTRIDED-NEXT: br label [[EXIT:%.*]] -; NOSTRIDED: scalar.ph: ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] -; NOSTRIDED: loop: -; NOSTRIDED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; NOSTRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[IN]], i64 [[IV]] -; NOSTRIDED-NEXT: [[TMP8:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; NOSTRIDED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT]], i64 [[IV]] -; NOSTRIDED-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX2]], align 8 -; NOSTRIDED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; NOSTRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; NOSTRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; NOSTRIDED: exit: ; NOSTRIDED-NEXT: ret void ; @@ -1452,18 +1404,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; STRIDED-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; STRIDED: middle.block: -; STRIDED-NEXT: br label [[EXIT:%.*]] -; STRIDED: scalar.ph: ; STRIDED-NEXT: br label [[LOOP:%.*]] -; STRIDED: loop: -; STRIDED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; STRIDED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[IN]], i64 [[IV]] -; STRIDED-NEXT: [[TMP8:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; STRIDED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT]], i64 [[IV]] -; STRIDED-NEXT: store i64 [[TMP8]], ptr [[ARRAYIDX2]], align 8 -; STRIDED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; STRIDED-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; STRIDED-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]] ; STRIDED: exit: ; STRIDED-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 6652fef..8ab0f6f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1206,17 +1206,6 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[LOOP:.*]] -; IF-EVL: [[LOOP]]: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IF-EVL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[GEP]] to i64 -; IF-EVL-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: store i64 [[TMP0]], ptr [[GEP2]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index 61f97aa..34a8275 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -43,23 +43,9 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP20]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP27]], 3 -; IF-EVL-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP27]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] -; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-OUTLOOP: for.end: -; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] -; IF-EVL-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[TMP24]] ; ; IF-EVL-INLOOP-LABEL: define i32 @cond_add( ; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { @@ -84,23 +70,9 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-INLOOP: scalar.ph: ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 3 -; IF-EVL-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP25]], i32 0 -; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] -; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: for.end: -; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] -; IF-EVL-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-INLOOP-NEXT: ret i32 [[TMP22]] ; ; NO-VP-OUTLOOP-LABEL: define i32 @cond_add( ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0:[0-9]+]] { @@ -239,30 +211,12 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP23]] ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PREDPHI]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-OUTLOOP-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], 3 -; IF-EVL-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; IF-EVL-OUTLOOP: if.then: -; IF-EVL-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP28]] -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_INC]] -; IF-EVL-OUTLOOP: for.inc: -; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] +; IF-EVL-OUTLOOP-NEXT: br label [[FOR_INC:%.*]] ; IF-EVL-OUTLOOP: for.end: -; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] -; IF-EVL-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[TMP27]] ; ; IF-EVL-INLOOP-LABEL: define i32 @cond_add_pred( ; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -284,29 +238,11 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]] ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[FOR_INC]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-INLOOP-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP25]], 3 -; IF-EVL-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; IF-EVL-INLOOP: if.then: -; IF-EVL-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[RDX]], [[TMP25]] -; IF-EVL-INLOOP-NEXT: br label [[FOR_INC]] -; IF-EVL-INLOOP: for.inc: -; IF-EVL-INLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] +; IF-EVL-INLOOP-NEXT: br label [[FOR_INC:%.*]] ; IF-EVL-INLOOP: for.end: -; IF-EVL-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[FOR_INC]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] -; IF-EVL-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] +; IF-EVL-INLOOP-NEXT: ret i32 [[TMP22]] ; ; NO-VP-OUTLOOP-LABEL: define i32 @cond_add_pred( ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -466,27 +402,12 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-OUTLOOP-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 -; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP37]], [[IV_TRUNC]] -; IF-EVL-OUTLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP37]], i32 0 -; IF-EVL-OUTLOOP-NEXT: [[ADD]] = add nsw i32 [[SELECT]], [[RDX]] -; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] ; IF-EVL-OUTLOOP: for.end: -; IF-EVL-OUTLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP22]], [[MIDDLE_BLOCK]] ] -; IF-EVL-OUTLOOP-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[TMP22]] ; ; IF-EVL-INLOOP-LABEL: define i32 @step_cond_add( ; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -516,26 +437,11 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-INLOOP: scalar.ph: ; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[RDX1:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-INLOOP-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 -; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP28]], [[IV_TRUNC]] -; IF-EVL-INLOOP-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP28]], i32 0 -; IF-EVL-INLOOP-NEXT: [[ADD1]] = add nsw i32 [[SELECT]], [[RDX1]] -; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] ; IF-EVL-INLOOP: for.end: -; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], [[FOR_BODY]] ], [ [[ADD]], [[MIDDLE_BLOCK]] ] -; IF-EVL-INLOOP-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-INLOOP-NEXT: ret i32 [[ADD]] ; ; NO-VP-OUTLOOP-LABEL: define i32 @step_cond_add( ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -700,31 +606,12 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT2]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP24]]) -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-OUTLOOP: scalar.ph: -; IF-EVL-OUTLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-OUTLOOP: for.body: -; IF-EVL-OUTLOOP-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[MIDDLE_BLOCK:%.*]] ] -; IF-EVL-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[MIDDLE_BLOCK]] ] -; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] -; IF-EVL-OUTLOOP-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; IF-EVL-OUTLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV1]] to i32 -; IF-EVL-OUTLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP38]], [[IV_TRUNC]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[MIDDLE_BLOCK]] -; IF-EVL-OUTLOOP: if.then: -; IF-EVL-OUTLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[BC_MERGE_RDX]], [[TMP38]] -; IF-EVL-OUTLOOP-NEXT: br label [[MIDDLE_BLOCK]] -; IF-EVL-OUTLOOP: for.inc: -; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[BC_MERGE_RDX]], [[FOR_BODY]] ] -; IF-EVL-OUTLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 -; IF-EVL-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] +; IF-EVL-OUTLOOP-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; IF-EVL-OUTLOOP: for.end: -; IF-EVL-OUTLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[MIDDLE_BLOCK1]] ] -; IF-EVL-OUTLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] +; IF-EVL-OUTLOOP-NEXT: ret i32 [[TMP27]] ; ; IF-EVL-INLOOP-LABEL: define i32 @step_cond_add_pred( ; IF-EVL-INLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -753,30 +640,11 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-INLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-INLOOP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL-INLOOP: middle.block: -; IF-EVL-INLOOP-NEXT: br label [[FOR_END:%.*]] -; IF-EVL-INLOOP: scalar.ph: -; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL-INLOOP: for.body: -; IF-EVL-INLOOP-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[MIDDLE_BLOCK:%.*]] ] -; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[RDX_ADD:%.*]], [[MIDDLE_BLOCK]] ] -; IF-EVL-INLOOP-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] -; IF-EVL-INLOOP-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; IF-EVL-INLOOP-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV1]] to i32 -; IF-EVL-INLOOP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP35]], [[IV_TRUNC]] -; IF-EVL-INLOOP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[MIDDLE_BLOCK]] -; IF-EVL-INLOOP: if.then: -; IF-EVL-INLOOP-NEXT: [[ADD_PRED:%.*]] = add nsw i32 [[BC_MERGE_RDX]], [[TMP35]] -; IF-EVL-INLOOP-NEXT: br label [[MIDDLE_BLOCK]] -; IF-EVL-INLOOP: for.inc: -; IF-EVL-INLOOP-NEXT: [[RDX_ADD]] = phi i32 [ [[ADD_PRED]], [[IF_THEN]] ], [ [[BC_MERGE_RDX]], [[FOR_BODY]] ] -; IF-EVL-INLOOP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 -; IF-EVL-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3]] +; IF-EVL-INLOOP-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; IF-EVL-INLOOP: for.end: -; IF-EVL-INLOOP-NEXT: [[RDX_ADD_LCSSA:%.*]] = phi i32 [ [[RDX_ADD]], [[MIDDLE_BLOCK]] ], [ [[TMP17]], [[MIDDLE_BLOCK1]] ] -; IF-EVL-INLOOP-NEXT: ret i32 [[RDX_ADD_LCSSA]] +; IF-EVL-INLOOP-NEXT: ret i32 [[TMP17]] ; ; NO-VP-OUTLOOP-LABEL: define i32 @step_cond_add_pred( ; NO-VP-OUTLOOP-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) #[[ATTR0]] { @@ -931,20 +799,16 @@ for.end: ; IF-EVL-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; IF-EVL-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; IF-EVL-OUTLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; IF-EVL-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; IF-EVL-OUTLOOP: [[META4]] = !{!"llvm.loop.vectorize.enable", i1 true} +; IF-EVL-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; IF-EVL-OUTLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; IF-EVL-OUTLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; IF-EVL-OUTLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; IF-EVL-OUTLOOP: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ;. ; IF-EVL-INLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; IF-EVL-INLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; IF-EVL-INLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; IF-EVL-INLOOP: [[META4]] = !{!"llvm.loop.vectorize.enable", i1 true} +; IF-EVL-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; IF-EVL-INLOOP: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; IF-EVL-INLOOP: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; IF-EVL-INLOOP: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; IF-EVL-INLOOP: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ;. ; NO-VP-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; NO-VP-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index 22d216e..8cd540c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -33,20 +33,6 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[LOOP:.*]] -; IF-EVL: [[LOOP]]: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 -; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 -; IF-EVL-NEXT: [[TMP18:%.*]] = sdiv i64 [[TMP16]], [[TMP17]] -; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -143,20 +129,6 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[LOOP:.*]] -; IF-EVL: [[LOOP]]: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 -; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 -; IF-EVL-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP16]], [[TMP17]] -; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -252,20 +224,6 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[LOOP:.*]] -; IF-EVL: [[LOOP]]: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 -; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 -; IF-EVL-NEXT: [[TMP18:%.*]] = srem i64 [[TMP16]], [[TMP17]] -; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; @@ -361,20 +319,6 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[EXIT:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[LOOP:.*]] -; IF-EVL: [[LOOP]]: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; IF-EVL-NEXT: [[A_GEP:%.*]] = getelementptr i64, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_GEP]], align 8 -; IF-EVL-NEXT: [[B_GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_GEP]], align 8 -; IF-EVL-NEXT: [[TMP18:%.*]] = urem i64 [[TMP16]], [[TMP17]] -; IF-EVL-NEXT: [[C_GEP:%.*]] = getelementptr i64, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: store i64 [[TMP18]], ptr [[C_GEP]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; IF-EVL: [[EXIT]]: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index b153328..c7ba826 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -42,19 +42,6 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] -; IF-EVL: [[FOR_BODY]]: -; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ 33, %[[SCALAR_PH]] ], [ [[TMP24:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] -; IF-EVL-NEXT: [[TMP24]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR1]], [[TMP24]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: [[FOR_END]]: ; IF-EVL-NEXT: ret void ; @@ -167,23 +154,9 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] -; IF-EVL: [[FOR_BODY]]: -; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ 33, %[[SCALAR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ 22, %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] -; IF-EVL-NEXT: [[TMP31]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR1]], [[FOR2]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3]] ; IF-EVL: [[FOR_END]]: ; IF-EVL-NEXT: ret void ; @@ -316,25 +289,9 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] ; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] -; IF-EVL: [[FOR_BODY]]: -; IF-EVL-NEXT: [[INDVARS:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR1:%.*]] = phi i32 [ 33, %[[SCALAR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR2:%.*]] = phi i32 [ 22, %[[SCALAR_PH]] ], [ [[FOR1]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR3:%.*]] = phi i32 [ 11, %[[SCALAR_PH]] ], [ [[FOR2]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] -; IF-EVL-NEXT: [[TMP38]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[FOR2]], [[FOR3]] -; IF-EVL-NEXT: [[ADD1:%.*]] = add i32 [[ADD]], [[FOR1]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS]] -; IF-EVL-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3]] ; IF-EVL: [[FOR_END]]: ; IF-EVL-NEXT: ret void ; @@ -469,7 +426,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP12]], align 4 ; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS]], [[TMP3]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 @@ -495,7 +452,7 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 ; IF-EVL-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[IV]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[FOR_END]]: ; IF-EVL-NEXT: [[FOR1_LCSSA:%.*]] = phi i32 [ [[FOR1]], %[[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[FOR1_LCSSA]] @@ -613,20 +570,9 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: br label %[[FOR_END:.*]] -; IF-EVL: [[SCALAR_PH:.*]]: -; IF-EVL-NEXT: br label %[[FOR_BODY:.*]] -; IF-EVL: [[FOR_BODY]]: -; IF-EVL-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[FOR1:%.*]] = phi i64 [ 33, %[[SCALAR_PH]] ], [ [[TMP14:%.*]], %[[FOR_BODY]] ] -; IF-EVL-NEXT: [[TMP14]] = add i64 [[IV1]], 42 -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[IV1]] -; IF-EVL-NEXT: store i64 [[FOR1]], ptr [[ARRAYIDX]], align 8 -; IF-EVL-NEXT: [[IV1_NEXT]] = add nuw nsw i64 [[IV1]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV1_NEXT]], [[TC]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3]] ; IF-EVL: [[FOR_END]]: ; IF-EVL-NEXT: ret void ; @@ -713,13 +659,11 @@ for.end: ; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; IF-EVL: [[META4]] = !{!"llvm.loop.vectorize.enable", i1 true} +; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} ; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} -; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ;. ; NO-VP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; NO-VP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index df550ec..b9a4e97 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -30,21 +30,9 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @add( ; NO-VP-NEXT: entry: @@ -129,7 +117,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP5]] = mul i32 [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -146,7 +134,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL1]] = mul nsw i32 [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -231,23 +219,11 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[OR]] = or i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[OR_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @or( ; NO-VP-NEXT: entry: @@ -327,23 +303,11 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[AND]] = and i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[AND_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @and( ; NO-VP-NEXT: entry: @@ -423,23 +387,11 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[XOR]] = xor i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[XOR_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @xor( ; NO-VP-NEXT: entry: @@ -519,24 +471,11 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[SMIN_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_MINMAX]] ; ; NO-VP-LABEL: @smin( ; NO-VP-NEXT: entry: @@ -618,24 +557,11 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[SMAX_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_MINMAX]] ; ; NO-VP-LABEL: @smax( ; NO-VP-NEXT: entry: @@ -717,24 +643,11 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[UMIN_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_MINMAX]] ; ; NO-VP-LABEL: @umin( ; NO-VP-NEXT: entry: @@ -816,24 +729,11 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[UMAX_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_MINMAX]] ; ; NO-VP-LABEL: @umax( ; NO-VP-NEXT: entry: @@ -915,23 +815,11 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD]] = fadd reassoc float [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[ADD_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP15]] ; ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: @@ -1016,7 +904,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc float [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = fmul reassoc float [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -1033,7 +921,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL1]] = fmul reassoc float [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -1119,24 +1007,11 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MIN_LCSSA]] +; IF-EVL-NEXT: ret float [[RDX_MINMAX_SELECT]] ; ; NO-VP-LABEL: @fmin( ; NO-VP-NEXT: entry: @@ -1220,24 +1095,11 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP17]], [[RDX]] -; IF-EVL-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MAX_LCSSA]] +; IF-EVL-NEXT: ret float [[RDX_MINMAX_SELECT]] ; ; NO-VP-LABEL: @fmax( ; NO-VP-NEXT: entry: @@ -1324,7 +1186,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1342,7 +1204,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP30:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1432,7 +1294,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1450,7 +1312,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP32:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1539,25 +1401,11 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP21]], float [[TMP22]], float [[RDX]]) -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MULADD_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP18]] ; ; NO-VP-LABEL: @fmuladd( ; NO-VP-NEXT: entry: @@ -1644,27 +1492,14 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP21]], 3 -; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_SELECT]] ; ; NO-VP-LABEL: @anyof_icmp( ; NO-VP-NEXT: entry: @@ -1749,27 +1584,14 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP21]], 3.000000e+00 -; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_SELECT]] ; ; NO-VP-LABEL: @anyof_fcmp( ; NO-VP-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index 7c05f46..0c22a9e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -32,21 +32,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 -; IF-EVL-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 -; IF-EVL-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] -; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.cond.cleanup: ; IF-EVL-NEXT: ret void ; @@ -156,30 +142,12 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]]) -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 -; IF-EVL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[TMP16]] -; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP17]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 3 -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD2]] = add nsw i32 [[ADD1]], [[TMP18]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]] ; IF-EVL: exit: -; IF-EVL-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ADD2_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @load_factor_4_with_gap( ; NO-VP-NEXT: entry: @@ -299,22 +267,9 @@ define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP6]] ; IF-EVL-NEXT: [[VEC_IND_NEXT5]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP8:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[TMP15:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 0 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 1 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 3 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[TMP15]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -427,30 +382,12 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP12]]) -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ADD2:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 -; IF-EVL-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[TMP16]] -; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 -; IF-EVL-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP17]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD2]] = add nsw i32 [[ADD1]], [[TMP18]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]] ; IF-EVL: exit: -; IF-EVL-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD2]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ADD2_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP15]] ; ; NO-VP-LABEL: @load_factor_4_with_tail_gap( ; NO-VP-NEXT: entry: @@ -571,22 +508,9 @@ define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP6]] ; IF-EVL-NEXT: [[VEC_IND_NEXT5]] = add <vscale x 4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP8:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[TMP15:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 0 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 1 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX1]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP15]], i32 2 -; IF-EVL-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[TMP15]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -697,33 +621,12 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP16]]) -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[N]], [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 -; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[TMP20]] -; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 -; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[TMP21]] -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 -; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD1]], [[TMP22]] -; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 3 -; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; IF-EVL-NEXT: [[ADD3]] = add nsw i32 [[ADD2]], [[TMP23]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp sgt i64 [[IV_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]] ; IF-EVL: exit: -; IF-EVL-NEXT: [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ADD3_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP19]] ; ; NO-VP-LABEL: @load_factor_4_reverse( ; NO-VP-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll index 00c88a46..1aea6aa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-iv32.ll @@ -26,18 +26,7 @@ define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV1]] -; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV1]] -; IF-EVL-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4 -; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT1]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]] ; IF-EVL: for.cond.cleanup: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll index a03b430..e94e64f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll @@ -32,17 +32,6 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]] -; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1 -; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp uge i64 [[I_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -92,17 +81,6 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]] -; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1 -; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -152,17 +130,6 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[I]] -; CHECK-NEXT: [[X:%.*]] = load i64, ptr [[GEP]], align 8 -; CHECK-NEXT: [[Y:%.*]] = add i64 [[X]], 1 -; CHECK-NEXT: store i64 [[Y]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[TC_ADD]] -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index 58b4c53..b13c671 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -30,25 +30,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: -; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[SCALAR_PH:%.*]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP23]], 0 -; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; IF-EVL: if.then: -; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], [[TMP24]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 -; IF-EVL-NEXT: br label [[FOR_INC]] -; IF-EVL: for.inc: -; IF-EVL-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]] +; IF-EVL-NEXT: br label [[FOR_INC:%.*]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index 6c487ab..dcb7bf4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -29,21 +29,9 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD]] = fadd float [[TMP17]], [[SUM_07]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[ADD_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP14]] ; ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index e14ff7c..7179e7d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -30,21 +30,9 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP14]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ADD_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP17]] ; ; NO-VP-LABEL: @add( ; NO-VP-NEXT: entry: @@ -129,7 +117,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP4]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = mul <8 x i32> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP6]]) @@ -147,7 +135,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -233,24 +221,12 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP14]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[OR]] = or i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[OR_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP17]] ; ; NO-VP-LABEL: @or( ; NO-VP-NEXT: entry: @@ -332,24 +308,12 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP14]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[AND]] = and i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[AND_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP17]] ; ; NO-VP-LABEL: @and( ; NO-VP-NEXT: entry: @@ -431,24 +395,12 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP14]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[XOR]] = xor i32 [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[XOR_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP17]] ; ; NO-VP-LABEL: @xor( ; NO-VP-NEXT: entry: @@ -532,25 +484,12 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[SMIN_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP18]] ; ; NO-VP-LABEL: @smin( ; NO-VP-NEXT: entry: @@ -638,25 +577,12 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp sgt i32 [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[SMAX_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP18]] ; ; NO-VP-LABEL: @smax( ; NO-VP-NEXT: entry: @@ -744,25 +670,12 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[UMIN_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP18]] ; ; NO-VP-LABEL: @umin( ; NO-VP-NEXT: entry: @@ -850,25 +763,12 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP19]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[UMAX_LCSSA]] +; IF-EVL-NEXT: ret i32 [[TMP18]] ; ; NO-VP-LABEL: @umax( ; NO-VP-NEXT: entry: @@ -954,24 +854,12 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP14]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ADD]] = fadd reassoc float [[TMP18]], [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[ADD_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP17]] ; ; NO-VP-LABEL: @fadd( ; NO-VP-NEXT: entry: @@ -1056,7 +944,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP4]] = fmul reassoc <8 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = fmul reassoc <8 x float> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP6]]) @@ -1074,7 +962,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -1162,25 +1050,12 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP19]], float [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MIN_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP18]] ; ; NO-VP-LABEL: @fmin( ; NO-VP-NEXT: entry: @@ -1268,25 +1143,12 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP15]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP19]], [[RDX]] -; IF-EVL-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP19]], float [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MAX_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP18]] ; ; NO-VP-LABEL: @fmax( ; NO-VP-NEXT: entry: @@ -1375,7 +1237,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1393,7 +1255,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP30:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1483,7 +1345,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1501,7 +1363,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP32:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1590,26 +1452,12 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP17]]) -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP21]], float [[TMP22]], float [[RDX]]) -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret float [[MULADD_LCSSA]] +; IF-EVL-NEXT: ret float [[TMP20]] ; ; NO-VP-LABEL: @fmuladd( ; NO-VP-NEXT: entry: @@ -1696,27 +1544,14 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP20]], 3 -; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_SELECT]] ; ; NO-VP-LABEL: @anyof_icmp( ; NO-VP-NEXT: entry: @@ -1801,27 +1636,14 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] ; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[INV:%.*]], i32 [[START:%.*]] -; IF-EVL-NEXT: br label [[FOR_END:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP_I:%.*]] = fcmp fast olt float [[TMP20]], 3.000000e+00 -; IF-EVL-NEXT: [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]] -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; IF-EVL: for.end: -; IF-EVL-NEXT: [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; IF-EVL-NEXT: ret i32 [[ANYOF_LCSSA]] +; IF-EVL-NEXT: ret i32 [[RDX_SELECT]] ; ; NO-VP-LABEL: @anyof_fcmp( ; NO-VP-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 5b9bc50..e70894b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -43,20 +43,7 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[LOOPEND:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL]], [[SCALAR_PH:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 -; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD]] -; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 -; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] -; IF-EVL-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 -; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 -; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 -; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]] ; IF-EVL: loopend: ; IF-EVL-NEXT: ret void ; @@ -179,27 +166,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[LOOPEND:%.*]] -; IF-EVL: scalar.ph: -; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL]], [[SCALAR_PH:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ] -; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ] -; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 -; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[I]] -; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 -; IF-EVL-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP]], 100 -; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; IF-EVL: if.then: -; IF-EVL-NEXT: [[GEPL1:%.*]] = getelementptr inbounds i32, ptr [[PTR1]], i64 [[ADD]] -; IF-EVL-NEXT: [[V:%.*]] = load i32, ptr [[GEPL1]], align 4 -; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] -; IF-EVL-NEXT: store i32 [[V]], ptr [[GEPS]], align 4 -; IF-EVL-NEXT: br label [[FOR_INC]] -; IF-EVL: for.inc: -; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 -; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 -; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]] +; IF-EVL-NEXT: br label [[FOR_INC:%.*]] ; IF-EVL: loopend: ; IF-EVL-NEXT: ret void ; @@ -351,22 +318,7 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 1024, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: [[X:%.*]] = load i8, ptr [[GEP_A]], align 1 -; IF-EVL-NEXT: [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i8 [[X]] -; IF-EVL-NEXT: [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1 -; IF-EVL-NEXT: [[GEP_C:%.*]] = getelementptr i8, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_C]], align 1 -; IF-EVL-NEXT: [[GEP_D:%.*]] = getelementptr i8, ptr [[D]], i64 [[IV]] -; IF-EVL-NEXT: store i8 [[Y]], ptr [[GEP_D]], align 1 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 -; IF-EVL-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; IF-EVL-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[LOOP]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index b13f97d..e1c62fe 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -31,19 +31,7 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 8 -; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 200 -; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 8 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -125,19 +113,7 @@ define void @test_may_clobber1(ptr %p) { ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 -; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -157,19 +133,7 @@ define void @test_may_clobber1(ptr %p) { ; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; NO-VP: middle.block: -; NO-VP-NEXT: br label [[EXIT:%.*]] -; NO-VP: scalar.ph: ; NO-VP-NEXT: br label [[LOOP:%.*]] -; NO-VP: loop: -; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 100 -; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; NO-VP: exit: ; NO-VP-NEXT: ret void ; @@ -259,19 +223,7 @@ define void @test_may_clobber3(ptr %p) { ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 -; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -291,19 +243,7 @@ define void @test_may_clobber3(ptr %p) { ; NO-VP-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; NO-VP-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; NO-VP: middle.block: -; NO-VP-NEXT: br label [[EXIT:%.*]] -; NO-VP: scalar.ph: ; NO-VP-NEXT: br label [[LOOP:%.*]] -; NO-VP: loop: -; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; NO-VP-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; NO-VP-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; NO-VP-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 10 -; NO-VP-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; NO-VP-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; NO-VP-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; NO-VP-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; NO-VP-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; NO-VP: exit: ; NO-VP-NEXT: ret void ; @@ -347,19 +287,7 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 8192 -; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; @@ -446,19 +374,7 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[EXIT:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[LOOP:%.*]] -; IF-EVL: loop: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; IF-EVL-NEXT: [[A1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]] -; IF-EVL-NEXT: [[V:%.*]] = load i64, ptr [[A1]], align 32 -; IF-EVL-NEXT: [[OFFSET:%.*]] = add i64 [[IV]], 1024 -; IF-EVL-NEXT: [[A2:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET]] -; IF-EVL-NEXT: store i64 [[V]], ptr [[A2]], align 32 -; IF-EVL-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IF-EVL-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 3001 -; IF-EVL-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; IF-EVL: exit: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index 0bb7ad0..f804329 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -38,16 +38,6 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[TMP22:%.*]] = sub nuw nsw i64 1, [[IV1]] -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP22]] -; CHECK-NEXT: store i64 0, ptr [[ARRAYIDX14]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 5c89f21..c5319c6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -31,20 +31,6 @@ define void @test_pr98413_zext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP_SRC]], align 8 -; CHECK-NEXT: [[EXT_L:%.*]] = zext i16 [[L]] to i64 -; CHECK-NEXT: [[AND:%.*]] = and i64 [[X]], [[EXT_L]] -; CHECK-NEXT: [[TRUNC_AND:%.*]] = trunc i64 [[AND]] to i8 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 [[TRUNC_AND]], ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 96 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -95,20 +81,6 @@ define void @test_pr98413_sext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP_SRC]], align 8 -; CHECK-NEXT: [[EXT_L:%.*]] = sext i16 [[L]] to i64 -; CHECK-NEXT: [[AND:%.*]] = and i64 [[X]], [[EXT_L]] -; CHECK-NEXT: [[TRUNC_AND:%.*]] = trunc i64 [[AND]] to i8 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 [[TRUNC_AND]], ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 96 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -151,21 +123,6 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = or i8 23, [[X]] -; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i8 [[TMP4]] to i1 -; CHECK-NEXT: br i1 [[EXTRACT_T]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: store i8 0, ptr [[DST]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[ADD]] = add i8 [[F_039]], 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[F_039]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 8 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -260,23 +217,6 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[T1:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[T1]], [[T]] -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IDXPROM]] -; CHECK-NEXT: [[RETVAL:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: store double [[RETVAL]], ptr [[DST]], align 8 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[V]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 6efb035..000dc4a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -22,20 +22,6 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] -; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[GEP_SRC1]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i32 -; CHECK-NEXT: [[MUL16:%.*]] = mul i32 0, [[CONV]] -; CHECK-NEXT: [[SHR35:%.*]] = lshr i32 [[MUL16]], 1 -; CHECK-NEXT: [[CONV36:%.*]] = trunc i32 [[SHR35]] to i8 -; CHECK-NEXT: store i8 [[CONV36]], ptr null, align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], 8 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 9095d6e..bae97e5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -29,16 +29,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -97,16 +87,6 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -292,22 +272,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 -; SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_LOAD:.*]], label %[[LATCH]] -; SCALABLE: [[DO_LOAD]]: -; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; SCALABLE-NEXT: br label %[[LATCH]] -; SCALABLE: [[LATCH]]: -; SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[V]], %[[DO_LOAD]] ] -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -389,22 +353,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 -; TF-SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_LOAD:.*]], label %[[LATCH]] -; TF-SCALABLE: [[DO_LOAD]]: -; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; TF-SCALABLE-NEXT: br label %[[LATCH]] -; TF-SCALABLE: [[LATCH]]: -; TF-SCALABLE-NEXT: [[PHI:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[V]], %[[DO_LOAD]] ] -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -451,19 +399,9 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -519,19 +457,9 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -571,19 +499,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -639,19 +557,9 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -700,19 +608,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -781,19 +679,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -843,24 +731,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 -; SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_STORE:.*]], label %[[LATCH]] -; SCALABLE: [[DO_STORE]]: -; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 -; SCALABLE-NEXT: br label %[[LATCH]] -; SCALABLE: [[LATCH]]: -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -939,24 +812,9 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10 -; TF-SCALABLE-NEXT: br i1 [[CMP]], label %[[DO_STORE:.*]], label %[[LATCH]] -; TF-SCALABLE: [[DO_STORE]]: -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 -; TF-SCALABLE-NEXT: br label %[[LATCH]] -; TF-SCALABLE: [[LATCH]]: -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; @@ -1002,19 +860,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] -; SCALABLE: [[SCALAR_PH:.*]]: -; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 -; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: ; SCALABLE-NEXT: ret void ; @@ -1070,19 +918,9 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] -; TF-SCALABLE: [[SCALAR_PH:.*]]: -; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] -; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: ; TF-SCALABLE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll index 8c67b4c..1676461 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vector-loop-backedge-elimination-with-evl.ll @@ -15,15 +15,6 @@ define void @foo(ptr %arg) #0 { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [3 x i64], ptr [[ARG]], i64 0, i64 [[IV]] -; CHECK-NEXT: store i64 0, ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -61,18 +52,8 @@ define i32 @test_remove_iv(i32 %start) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[START]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED_NEXT]] = xor i32 [[RED]], 3 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 5 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 649ce60..0a64723 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -30,21 +30,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; IF-EVL: for.cond.cleanup: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll index b0f0c39..b106f99 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll @@ -25,11 +25,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 poison ; @@ -76,11 +72,7 @@ define i32 @foo1(ptr nocapture noalias %A, ptr nocapture %PtrPtr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 poison ; diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 1d4cbc3..78c71fd 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -38,15 +38,6 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 3 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], 22 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index a423f06..02e82b4 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -91,23 +91,7 @@ define void @test(ptr %p, i40 %a) { ; CHECK: pred.store.continue30: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[SHL:%.*]] = shl i40 [[A]], 24 -; CHECK-NEXT: [[ASHR:%.*]] = ashr i40 [[SHL]], 28 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i40 [[ASHR]] to i32 -; CHECK-NEXT: [[ICMP_EQ:%.*]] = icmp eq i32 [[TRUNC]], 0 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[ICMP_EQ]] to i32 -; CHECK-NEXT: [[ICMP_ULT:%.*]] = icmp ult i32 0, [[ZEXT]] -; CHECK-NEXT: [[OR:%.*]] = or i1 [[ICMP_ULT]], true -; CHECK-NEXT: [[ICMP_SGT:%.*]] = icmp sgt i1 [[OR]], false -; CHECK-NEXT: store i1 [[ICMP_SGT]], ptr [[P]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[IV_NEXT]], 10 -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 3c788b2..ee84ef2 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -63,19 +63,7 @@ define void @func_21() { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[LV:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LV]] = load i32, ptr [[A_PTR]], align 4 -; CHECK-NEXT: [[B_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll index d40cb6e..cfb1805 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll @@ -66,25 +66,6 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4 -; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]] -; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV]], 4 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 [[IV_OR]] -; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4 -; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 9dd7e9f..f65a9d7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -22,19 +22,7 @@ define void @f1() { ; CHECK-NEXT: store <2 x ptr> <ptr @a, ptr @a>, ptr [[TMP1]], align 8 ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[_TMP9:%.*]], [[BB2]] ] -; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64 -; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x %rec8], ptr @a, i16 0, i64 [[_TMP1]] -; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64 -; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x ptr], ptr @b, i16 0, i64 [[_TMP6]] -; CHECK-NEXT: store ptr [[_TMP2]], ptr [[_TMP7]], align 8 -; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 -; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 -; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; @@ -102,25 +90,7 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] -; CHECK: then.1: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_0]], i1 false -; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] -; CHECK: then.2: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] -; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -195,25 +165,7 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] -; CHECK: then.1: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 -; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false -; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] -; CHECK: then.2: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] -; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -289,25 +241,7 @@ define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK: pred.store.continue8: ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] -; CHECK: then.1: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 -; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false -; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] -; CHECK: then.2: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] -; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -341,6 +275,23 @@ exit: define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-LABEL: @redundant_and_2( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C_0:%.*]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 +; CHECK-NEXT: [[OR:%.*]] = and i1 false, [[CMP]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false +; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]] +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index ee88abb..e0dd376 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -92,24 +92,8 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOPS:.*]] -; CHECK: [[LOOPS]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOPS]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOPS]] ] -; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP14]], [[EXT_0]] -; CHECK-NEXT: [[CONV_1:%.*]] = zext i32 [[SHR]] to i64 -; CHECK-NEXT: [[RED_NEXT_V:%.*]] = select i1 [[C]], i64 [[AND]], i64 [[CONV_1]] -; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED_NEXT_V]], [[RED]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOPS]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOPS]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[TMP13]] ; entry: %ext.0 = sext i8 0 to i32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 0ba885d..9453ad7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -696,19 +696,9 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 3, [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED_MUL]] = mul nsw i64 [[RED]], [[P_EXT]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %sel = select i1 false, i32 3, i32 0 @@ -753,22 +743,9 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT_I_I_I:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 1, [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[NOT_X:%.*]] = xor i1 [[X]], true -; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[NOT_X]] to i64 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[Y]], [[EXT]] -; CHECK-NEXT: [[RED_MUL]] = mul i64 [[SHL]], [[RED]] -; CHECK-NEXT: [[IV_NEXT_I_I_I]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RED_MUL_LCSSA:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RED_MUL_LCSSA]] +; CHECK-NEXT: ret i64 [[TMP4]] ; entry: br label %loop @@ -808,20 +785,9 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]]) ; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP1:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[VEC_EPILOG_PH:%.*]] ], [ [[INC:%.*]], [[LOOP1]] ] -; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ 0, [[VEC_EPILOG_PH]] ], [ [[OR:%.*]], [[LOOP1]] ] -; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1 -; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]] -; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 16 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]] ; CHECK: exit: -; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP1]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[OR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP21]] ; entry: %conv = zext i1 %cmp to i32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 3d07eca..249efe1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -39,30 +39,9 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP12]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY20:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 10, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC35:%.*]] ] -; CHECK-NEXT: [[SUM_01:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[SUM_1:%.*]], [[FOR_INC35]] ] -; CHECK-NEXT: [[REM4:%.*]] = and i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[CMP21:%.*]] = icmp eq i64 [[REM4]], 0 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: br i1 [[CMP21]], label [[IF_THEN22:%.*]], label [[FOR_INC35]] -; CHECK: if.then: -; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[TMP15]], 1 -; CHECK-NEXT: [[REM27:%.*]] = urem i32 [[MUL]], 10 -; CHECK-NEXT: br label [[FOR_INC35]] -; CHECK: loop.latch: -; CHECK-NEXT: [[REM27_PN:%.*]] = phi i32 [ [[REM27]], [[IF_THEN22]] ], [ [[TMP15]], [[FOR_BODY20]] ] -; CHECK-NEXT: [[SUM_1]] = or i32 [[REM27_PN]], [[SUM_01]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[EXIT]], label [[FOR_BODY20]] +; CHECK-NEXT: br label [[FOR_INC35:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_INC35]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[CMP41:%.*]] = icmp eq i32 [[SUM_1_LCSSA]], 0 +; CHECK-NEXT: [[CMP41:%.*]] = icmp eq i32 [[TMP14]], 0 ; CHECK-NEXT: ret i1 [[CMP41]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index d0c311eb..cc84fab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -495,18 +495,7 @@ define void @test_first_order_recurrence_tried_to_scalarized(ptr %dst, i1 %c, i3 ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 4, [[SCALAR_PH]] ], [ [[IV]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[FOR]] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[IV]] -; CHECK-NEXT: store i32 [[SUB]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 9528510..2f33e11 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -45,7 +45,8 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo1( ; FVW2-NEXT: entry: @@ -70,7 +71,8 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body @@ -137,7 +139,8 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo2( ; FVW2-NEXT: entry: @@ -182,7 +185,8 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body @@ -250,7 +254,8 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo3( ; FVW2-NEXT: entry: @@ -295,7 +300,8 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body @@ -350,7 +356,8 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo2_addrspace( ; FVW2-NEXT: entry: @@ -395,7 +402,8 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body @@ -449,7 +457,8 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo2_addrspace2( ; FVW2-NEXT: entry: @@ -494,7 +503,8 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body @@ -548,7 +558,8 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; AVX512-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: br label [[FOR_END:%.*]] -; AVX512: scalar.ph: +; AVX512: for.end: +; AVX512-NEXT: ret void ; ; FVW2-LABEL: @foo2_addrspace3( ; FVW2-NEXT: entry: @@ -593,7 +604,8 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: br label [[FOR_END:%.*]] -; FVW2: scalar.ph: +; FVW2: for.end: +; FVW2-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index b2d587c..877fcd4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -90,29 +90,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE: middle.block: ; SSE-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[PREDPHI3]], [[PREDPHI]] ; SSE-NEXT: [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]]) -; SSE-NEXT: br label [[DONE:%.*]] -; SSE: scalar.ph: -; SSE-NEXT: br label [[LOOP:%.*]] -; SSE: loop: -; SSE-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; SSE-NEXT: [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] -; SSE-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] -; SSE-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 -; SSE-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 -; SSE-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] -; SSE: do.add: -; SSE-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] -; SSE-NEXT: br label [[NEXT_ITER]] -; SSE: no.add: -; SSE-NEXT: br label [[NEXT_ITER]] -; SSE: next.iter: -; SSE-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] -; SSE-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; SSE-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 -; SSE-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]] +; SSE-NEXT: br label [[NEXT_ITER:%.*]] ; SSE: done: -; SSE-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; SSE-NEXT: ret double [[TOT_NEXT_LCSSA]] +; SSE-NEXT: ret double [[TMP11]] ; ; AVX-LABEL: @sumIfVector( ; AVX-NEXT: entry: @@ -153,29 +133,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[BIN_RDX10:%.*]] = fadd fast <4 x double> [[PREDPHI8]], [[BIN_RDX]] ; AVX-NEXT: [[BIN_RDX11:%.*]] = fadd fast <4 x double> [[PREDPHI9]], [[BIN_RDX10]] ; AVX-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[BIN_RDX11]]) -; AVX-NEXT: br label [[DONE:%.*]] -; AVX: scalar.ph: -; AVX-NEXT: br label [[LOOP:%.*]] -; AVX: loop: -; AVX-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] -; AVX-NEXT: [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] -; AVX-NEXT: [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] -; AVX-NEXT: [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 -; AVX-NEXT: [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 -; AVX-NEXT: br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] -; AVX: do.add: -; AVX-NEXT: [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] -; AVX-NEXT: br label [[NEXT_ITER]] -; AVX: no.add: -; AVX-NEXT: br label [[NEXT_ITER]] -; AVX: next.iter: -; AVX-NEXT: [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] -; AVX-NEXT: [[I_NEXT]] = add i32 [[I]], 1 -; AVX-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 -; AVX-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]] +; AVX-NEXT: br label [[NEXT_ITER:%.*]] ; AVX: done: -; AVX-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] -; AVX-NEXT: ret double [[TOT_NEXT_LCSSA]] +; AVX-NEXT: ret double [[TMP21]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 27eef01..a19b294 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -409,21 +409,9 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[REC_NEXT]] = mul i16 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[REC_LCSSA]] +; CHECK-NEXT: ret i16 [[VECTOR_RECUR_EXTRACT_FOR_PHI]] ; entry: br label %loop @@ -612,16 +600,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 91c7e7a3..2f96278 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -38,36 +38,6 @@ define void @test_free_instructions_feeding_geps_for_interleave_groups(ptr noali ; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[P_INVAR]], align 4 -; CHECK-NEXT: [[IV_MUL:%.*]] = shl i64 [[IV]], 2 -; CHECK-NEXT: [[GEP_DST_19:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[IV_MUL]] -; CHECK-NEXT: store float [[L_0]], ptr [[GEP_DST_19]], align 4 -; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[P_INVAR]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = or disjoint i64 [[IV_MUL]], 1 -; CHECK-NEXT: [[GEP_DST_119:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_1]] -; CHECK-NEXT: store float [[L_1]], ptr [[GEP_DST_119]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = or disjoint i64 [[IV_MUL]], 2 -; CHECK-NEXT: [[GEP_DST_129:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_2]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_129]], align 4 -; CHECK-NEXT: [[ADD_3:%.*]] = or disjoint i64 [[IV_MUL]], 3 -; CHECK-NEXT: [[GEP_DST_140:%.*]] = getelementptr float, ptr [[DST_1]], i64 [[ADD_3]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_140]], align 4 -; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[P_INVAR]], align 4 -; CHECK-NEXT: [[GEP_DST_247:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[IV_MUL]] -; CHECK-NEXT: store float [[L_2]], ptr [[GEP_DST_247]], align 4 -; CHECK-NEXT: [[GEP_DST_255:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_1]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_255]], align 4 -; CHECK-NEXT: [[GEP_DST_265:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_2]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_265]], align 4 -; CHECK-NEXT: [[GEP_DST_276:%.*]] = getelementptr float, ptr [[DST_2]], i64 [[ADD_3]] -; CHECK-NEXT: store float 0.000000e+00, ptr [[GEP_DST_276]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -504,17 +474,6 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]], i32 1 -; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_1]], align 8 -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { double, i64 }, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_0]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -616,17 +575,6 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i64 [[IV]], ptr [[GEP_0]], align 8 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr { i64, double }, ptr [[DST]], i64 [[IV]], i32 1 -; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll index 228bc80..e2329fe 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll @@ -34,13 +34,9 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; SSE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; SSE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SSE: middle.block: -; SSE-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; SSE: scalar.ph: ; SSE-NEXT: br label [[FOR_BODY:%.*]] ; SSE: for.cond.cleanup: ; SSE-NEXT: ret void -; SSE: for.body: -; SSE-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ; AVX1-LABEL: @foo( ; AVX1-NEXT: entry: @@ -88,13 +84,9 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; AVX1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; AVX1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX1: middle.block: -; AVX1-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; AVX1: scalar.ph: ; AVX1-NEXT: br label [[FOR_BODY:%.*]] ; AVX1: for.cond.cleanup: ; AVX1-NEXT: ret void -; AVX1: for.body: -; AVX1-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ; AVX2-LABEL: @foo( ; AVX2-NEXT: entry: @@ -142,13 +134,9 @@ define void @foo(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { ; AVX2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; AVX2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX2: middle.block: -; AVX2-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; AVX2: scalar.ph: ; AVX2-NEXT: br label [[FOR_BODY:%.*]] ; AVX2: for.cond.cleanup: ; AVX2-NEXT: ret void -; AVX2: for.body: -; AVX2-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ; ATOM-LABEL: @foo( ; ATOM-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 5853e91..5d40e6a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -409,16 +409,6 @@ define void @test_store_of_final_reduction_value(i64 %x, ptr %dst) { ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP0]]) ; CHECK-NEXT: store i64 [[TMP1]], ptr [[DST]], align 8 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV4:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED_NEXT]] = mul i64 [[RED]], [[X]] -; CHECK-NEXT: store i64 [[RED_NEXT]], ptr [[DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV4]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV4]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 9e0ef73..2a8c698 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -63,27 +63,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <4 x i32> [[TMP18]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <4 x i32> [[TMP19]], [[BIN_RDX13]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EARLYCND:%.*]] = icmp slt i64 [[IV]], [[LEN]] -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP21]] ; entry: %alloca = alloca [4096 x i32] @@ -212,28 +194,9 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %alloca = alloca [4096 x i32] @@ -390,27 +353,9 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP98]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP99]], [[BIN_RDX7]] ; CHECK-NEXT: [[TMP101:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP101]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP101]] ; entry: %alloca = alloca [4096 x i32] @@ -659,28 +604,9 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP146]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP147]], [[BIN_RDX37]] ; CHECK-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR_I16P:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR_I16P]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP149]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP149]] ; entry: %alloca = alloca [4096 x i32] @@ -974,28 +900,9 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1024, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %alloca = alloca [4096 x i32] @@ -1216,28 +1123,9 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP114]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP115]], [[BIN_RDX7]] ; CHECK-NEXT: [[TMP117:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4093 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP117]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP117]] ; entry: %alloca = alloca [4096 x i32] @@ -1366,28 +1254,9 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %alloca = alloca [1024 x i32] @@ -1516,28 +1385,9 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %alloca = alloca [4095 x i32] @@ -1666,28 +1516,9 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %alloca = alloca [16383 x i8] @@ -1985,28 +1816,9 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %allocation = call nonnull ptr @my_alloc(i32 16384) @@ -2136,28 +1948,9 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %allocation = call nonnull ptr @my_array_alloc(i32 4096, i32 4) @@ -2297,28 +2090,9 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[TMP74]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[TMP75]], [[BIN_RDX10]] ; CHECK-NEXT: [[TMP77:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[ALLOCATION]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP77]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP77]] ; entry: %allocation = call nonnull ptr @my_alloc(i32 16384) diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index d0991a5..e23f8a9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1199,19 +1199,7 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; O1VEC2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; O1VEC2: middle.block: -; O1VEC2-NEXT: br label [[FOR_END:%.*]] -; O1VEC2: scalar.ph: ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] -; O1VEC2: for.body: -; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]] -; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]] -; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]] -; O1VEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] ; O1VEC2: for.end: ; O1VEC2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 ; O1VEC2-NEXT: ret i32 [[TMP11]] @@ -1239,19 +1227,7 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; OzVEC2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OzVEC2: middle.block: -; OzVEC2-NEXT: br label [[FOR_END:%.*]] -; OzVEC2: scalar.ph: ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] -; OzVEC2: for.body: -; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]] -; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]] -; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]] -; OzVEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 -; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 -; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] ; OzVEC2: for.end: ; OzVEC2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 ; OzVEC2-NEXT: ret i32 [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index fc37e5f..e1140b5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -32,18 +32,6 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; @@ -69,18 +57,6 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: ; AUTOVF-NEXT: br label %[[FOR_END:.*]] -; AUTOVF: [[SCALAR_PH:.*]]: -; AUTOVF-NEXT: br label %[[FOR_BODY:.*]] -; AUTOVF: [[FOR_BODY]]: -; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; AUTOVF-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; AUTOVF-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; AUTOVF-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; AUTOVF: [[FOR_END]]: ; AUTOVF-NEXT: ret i32 0 ; @@ -128,18 +104,6 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret i32 0 ; @@ -165,18 +129,6 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: ; AUTOVF-NEXT: br label %[[FOR_END:.*]] -; AUTOVF: [[SCALAR_PH:.*]]: -; AUTOVF-NEXT: br label %[[FOR_BODY:.*]] -; AUTOVF: [[FOR_BODY]]: -; AUTOVF-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] -; AUTOVF-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP7]], 0 -; AUTOVF-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 -; AUTOVF-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 -; AUTOVF-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; AUTOVF: [[FOR_END]]: ; AUTOVF-NEXT: ret i32 0 ; @@ -226,18 +178,6 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_07]] -; CHECK-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]] ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: ret void ; @@ -263,18 +203,6 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon ; AUTOVF-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: ; AUTOVF-NEXT: br label %[[FOR_END_LOOPEXIT:.*]] -; AUTOVF: [[SCALAR_PH:.*]]: -; AUTOVF-NEXT: br label %[[FOR_BODY:.*]] -; AUTOVF: [[FOR_BODY]]: -; AUTOVF-NEXT: [[I_07:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; AUTOVF-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_07]], [[K]] -; AUTOVF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[MUL]] -; AUTOVF-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; AUTOVF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_07]] -; AUTOVF-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 -; AUTOVF-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 -; AUTOVF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 256 -; AUTOVF-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]] ; AUTOVF: [[FOR_END_LOOPEXIT]]: ; AUTOVF-NEXT: ret void ; @@ -431,14 +359,6 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 -72 -; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 -; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -475,14 +395,6 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; AUTOVF: [[MIDDLE_BLOCK]]: ; AUTOVF-NEXT: br label %[[EXIT:.*]] -; AUTOVF: [[SCALAR_PH:.*]]: -; AUTOVF-NEXT: br label %[[LOOP:.*]] -; AUTOVF: [[LOOP]]: -; AUTOVF-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; AUTOVF-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 -72 -; AUTOVF-NEXT: store ptr null, ptr [[PTR_IV]], align 8 -; AUTOVF-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; AUTOVF-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; AUTOVF: [[EXIT]]: ; AUTOVF-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll index 65f8487..5d76dfb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll @@ -108,11 +108,7 @@ define void @parallel_loop(ptr nocapture %a, ptr nocapture %b) nounwind uwtable ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll index 62eacf6..619693a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll @@ -104,23 +104,8 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_LATCH]], label %[[COND_FALSE:.*]] -; CHECK: [[COND_FALSE]]: -; CHECK-NEXT: [[SDIV:%.*]] = sdiv i16 [[SEXT]], [[ZEXT_TRUE]] -; CHECK-NEXT: [[SDIV_TRUNC:%.*]] = trunc i16 [[SDIV]] to i8 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[RET:%.*]] = phi i8 [ [[SDIV_TRUNC]], %[[COND_FALSE]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i8 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RET_LCSSA:%.*]] = phi i8 [ [[RET]], %[[LOOP_LATCH]] ], [ [[PREDPHI]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[RET_LCSSA]] +; CHECK-NEXT: ret i8 [[PREDPHI]] ; entry: %zext.true = zext i1 true to i16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll index 972164f..47db49c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr34438.ll @@ -16,26 +16,13 @@ define void @small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly % ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[B:%.*]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x float>, ptr [[A:%.*]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <8 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP6]], [[TMP7]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 0098065..e7f56a4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -43,26 +43,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP_1]], label %[[LOOP_LATCH]], label %[[THEN:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[M:%.*]] = phi i32 [ [[L]], %[[THEN]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i32 [[M]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[CMP_2]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[GEP_LCSSA:%.*]] = phi ptr [ [[GEP_SRC]], %[[LOOP_LATCH]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret ptr [[GEP_LCSSA]] +; CHECK-NEXT: ret ptr [[TMP2]] ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 3922796..3616379 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -39,23 +39,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[BB6:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 -; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] -; CHECK: bb18: -; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 -; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] -; CHECK-NEXT: store i64 1, ptr [[GETELEMENTPTR19]], align 8 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -99,6 +83,4 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 24} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 95} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 2bc3a97..f066000 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -71,23 +71,11 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ 0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd fast float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -134,23 +122,11 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ -0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd reassoc float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: @@ -197,23 +173,11 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK: middle.block: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc contract <4 x float> [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = call reassoc contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_INC:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[SUM_INC:%.*]], [[LOOP]] ], [ -0.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ADDRESS:%.*]] = getelementptr float, ptr [[ARRAY]], i32 [[IDX]] -; CHECK-NEXT: [[VALUE:%.*]] = load float, ptr [[ADDRESS]], align 4 -; CHECK-NEXT: [[SUM_INC]] = fadd reassoc contract float [[SUM]], [[VALUE]] -; CHECK-NEXT: [[IDX_INC]] = add i32 [[IDX]], 1 -; CHECK-NEXT: [[BE_COND:%.*]] = icmp ne i32 [[IDX_INC]], 4096 -; CHECK-NEXT: br i1 [[BE_COND]], label [[LOOP]], label [[LOOP_EXIT_LOOPEXIT]] ; CHECK: loop.exit.loopexit: -; CHECK-NEXT: [[SUM_INC_LCSSA:%.*]] = phi float [ [[SUM_INC]], [[LOOP]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_EXIT]] ; CHECK: loop.exit: -; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[SUM_INC_LCSSA]], [[LOOP_EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi float [ -0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP9]], [[LOOP]] ] ; CHECK-NEXT: ret float [[SUM_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll index 90f3df5..70b05ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll @@ -50,23 +50,6 @@ define void @smax_call_uniform(ptr %dst, i64 %x) { ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[REM1:%.*]] = urem i64 [[MUL]], [[X]] -; CHECK-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[REM1]], i64 0) -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[LOOP_HEADER]] ], [ [[SMAX]], %[[ELSE]] ] -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV_NEXT]] -; CHECK-NEXT: store i64 0, ptr [[GEP1]], align 8 -; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT1]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll index b713a39..272b62b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scev-checks-unprofitable.ll @@ -33,8 +33,6 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT_1_LOOPEXIT1:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_3:.*]] ; CHECK: [[LOOP_2_PREHEADER]]: ; CHECK-NEXT: br label %[[LOOP_2:.*]] ; CHECK: [[LOOP_2]]: @@ -48,13 +46,6 @@ define void @value_defined_in_loop1_used_for_trip_counts(i32 %start, i1 %c, ptr ; CHECK-NEXT: store i16 0, ptr [[GEP_DST]], align 2 ; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_2]], [[IV_1_LCSSA]] ; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_2]], label %[[EXIT_1_LOOPEXIT:.*]] -; CHECK: [[LOOP_3]]: -; CHECK-NEXT: [[IV_4:%.*]] = phi i64 [ [[IV_4_NEXT:%.*]], %[[LOOP_3]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_4]] -; CHECK-NEXT: store i8 0, ptr [[GEP_DST_2]], align 1 -; CHECK-NEXT: [[IV_4_NEXT]] = add i64 [[IV_4]], 1 -; CHECK-NEXT: [[EC_3:%.*]] = icmp ult i64 [[IV_4_NEXT]], [[IV_1_LCSSA]] -; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP_3]], label %[[EXIT_1_LOOPEXIT1]] ; CHECK: [[EXIT_1_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT_1:.*]] ; CHECK: [[EXIT_1_LOOPEXIT1]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index f877e1b..e99ffda 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -39,12 +39,8 @@ define void @example1() optsize { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[TMP7:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 6: -; CHECK-NEXT: br i1 poison, label [[TMP7]], label [[TMP6]] -; CHECK: 7: ; CHECK-NEXT: ret void ; br label %1 @@ -123,8 +119,6 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[DOT_PREHEADER_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[DOTLR_PH5:%.*]] ; CHECK: ..preheader_crit_edge: ; CHECK-NEXT: [[PHITMP:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: br label [[DOTPREHEADER]] @@ -134,7 +128,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: br i1 [[TMP16]], label [[DOT_CRIT_EDGE:%.*]], label [[DOTLR_PH_PREHEADER:%.*]] ; CHECK: .lr.ph.preheader: ; CHECK-NEXT: br label [[VECTOR_PH8:%.*]] -; CHECK: vector.ph8: +; CHECK: vector.ph7: ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[N_RND_UP10:%.*]] = add nuw nsw i64 [[TMP17]], 3 ; CHECK-NEXT: [[N_VEC12:%.*]] = and i64 [[N_RND_UP10]], 8589934588 @@ -142,7 +136,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_114]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT19]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY13:%.*]] -; CHECK: vector.body15: +; CHECK: vector.body14: ; CHECK-NEXT: [[INDEX16:%.*]] = phi i64 [ 0, [[VECTOR_PH8]] ], [ [[INDEX_NEXT29:%.*]], [[PRED_STORE_CONTINUE26:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[I_0_LCSSA]], [[INDEX16]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT17:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX16]], i64 0 @@ -151,7 +145,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT20]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP18]], i64 0 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]] -; CHECK: pred.store.if19: +; CHECK: pred.store.if18: ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr @c, i64 [[OFFSET_IDX]] @@ -160,10 +154,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP23]], [[TMP21]] ; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] -; CHECK: pred.store.continue20: +; CHECK: pred.store.continue19: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP18]], i64 1 ; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] -; CHECK: pred.store.if21: +; CHECK: pred.store.if20: ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 @@ -173,10 +167,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP31]], [[TMP29]] ; CHECK-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]] -; CHECK: pred.store.continue22: +; CHECK: pred.store.continue21: ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP18]], i64 2 ; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] -; CHECK: pred.store.if23: +; CHECK: pred.store.if22: ; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 @@ -186,10 +180,10 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP39]], [[TMP37]] ; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]] -; CHECK: pred.store.continue24: +; CHECK: pred.store.continue23: ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[TMP18]], i64 3 ; CHECK-NEXT: br i1 [[TMP42]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26]] -; CHECK: pred.store.if25: +; CHECK: pred.store.if24: ; CHECK-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, ptr @b, i64 [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 @@ -199,18 +193,12 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP47]], [[TMP45]] ; CHECK-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]] -; CHECK: pred.store.continue26: +; CHECK: pred.store.continue25: ; CHECK-NEXT: [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 4 ; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC12]] -; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK28:%.*]], label [[VECTOR_BODY13]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block28: -; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: scalar.ph7: +; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK27:%.*]], label [[VECTOR_BODY13]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block27: ; CHECK-NEXT: br label [[DOTLR_PH1:%.*]] -; CHECK: .lr.ph5: -; CHECK-NEXT: br i1 poison, label [[DOT_PREHEADER_CRIT_EDGE]], label [[DOTLR_PH5]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOTLR_PH]], label [[DOTLR_PH1]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -328,11 +316,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: @@ -418,12 +402,8 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[TMP5:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: 4: -; CHECK-NEXT: br i1 poison, label [[TMP5]], label [[TMP4]] -; CHECK: 5: ; CHECK-NEXT: ret void ; br label %1 @@ -516,12 +496,8 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[TMP26:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[TMP25:%.*]] ; CHECK: 25: -; CHECK-NEXT: br i1 poison, label [[TMP26]], label [[TMP25]] -; CHECK: 26: ; CHECK-NEXT: ret void ; br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 931c927..15e2678 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -15,17 +15,17 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[ITER_CHECK:.*]]: ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 ; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J]] to i64 -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] -; CHECK: [[VECTOR_PH1]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -184,15 +184,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 @@ -216,20 +216,20 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 -; CHECK-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) -; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: br label %[[FOR_BODY1:.*]] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP]]: -; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ADD7_LCSSA]] -; CHECK: [[FOR_BODY1]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ] -; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] @@ -239,24 +239,24 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; ; MAX-BW-LABEL: define i32 @matrix_row_col( ; MAX-BW-SAME: ptr readonly captures(none) [[DATA:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; MAX-BW-NEXT: [[ITER_CHECK:.*]]: ; MAX-BW-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 ; MAX-BW-NEXT: [[IDXPROM5:%.*]] = sext i32 [[J]] to i64 -; MAX-BW-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; MAX-BW-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; MAX-BW: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; MAX-BW-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; MAX-BW: [[VECTOR_PH]]: -; MAX-BW-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] -; MAX-BW: [[VECTOR_PH1]]: ; MAX-BW-NEXT: br label %[[VECTOR_BODY:.*]] ; MAX-BW: [[VECTOR_BODY]]: -; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH1]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP144:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -415,15 +415,15 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP149:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[BIN_RDX8]]) ; MAX-BW-NEXT: br i1 false, label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; MAX-BW: [[VEC_EPILOG_ITER_CHECK]]: -; MAX-BW-NEXT: br i1 false, label %[[SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; MAX-BW-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] ; MAX-BW: [[VEC_EPILOG_PH]]: -; MAX-BW-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] -; MAX-BW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] +; MAX-BW-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; MAX-BW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; MAX-BW-NEXT: [[TMP171:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 -; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] -; MAX-BW: [[FOR_BODY]]: -; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[FOR_BODY]] ] -; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[FOR_BODY]] ] +; MAX-BW-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; MAX-BW: [[VEC_EPILOG_VECTOR_BODY]]: +; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 @@ -447,20 +447,20 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP168]] = add <4 x i32> [[TMP167]], [[TMP166]] ; MAX-BW-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX9]], 4 ; MAX-BW-NEXT: [[TMP169:%.*]] = icmp eq i64 [[INDEX_NEXT12]], 100 -; MAX-BW-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; MAX-BW-NEXT: br i1 [[TMP169]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; MAX-BW: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAX-BW-NEXT: [[TMP170:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP168]]) -; MAX-BW-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[SCALAR_PH]] -; MAX-BW: [[SCALAR_PH]]: +; MAX-BW-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] +; MAX-BW: [[VEC_EPILOG_SCALAR_PH]]: ; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; MAX-BW-NEXT: br label %[[FOR_BODY1:.*]] +; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] ; MAX-BW: [[FOR_COND_CLEANUP]]: -; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY1]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; MAX-BW-NEXT: ret i32 [[ADD7_LCSSA]] -; MAX-BW: [[FOR_BODY1]]: -; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY1]] ] -; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY1]] ] +; MAX-BW: [[FOR_BODY]]: +; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ] ; MAX-BW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]] ; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]] @@ -470,7 +470,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]] ; MAX-BW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; MAX-BW-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100 -; MAX-BW-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] +; MAX-BW-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; entry: %idxprom = sext i32 %i to i64 @@ -555,26 +555,9 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: store i8 [[TMP35]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 -; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] -; CHECK-NEXT: [[IN1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_1]] -; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[IN0]], align 4 -; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[IN1]], align 4 -; CHECK-NEXT: [[REDUCE_ADD_0:%.*]] = add i32 [[V0]], [[V1]] -; CHECK-NEXT: [[REDUCE_ADD_0_NARROW:%.*]] = trunc i32 [[REDUCE_ADD_0]] to i8 -; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[IV_0]] -; CHECK-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] ; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; @@ -675,26 +658,9 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: store i8 [[TMP67]], ptr [[TMP51]], align 1 ; MAX-BW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; MAX-BW-NEXT: [[TMP68:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; MAX-BW-NEXT: br i1 [[TMP68]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; MAX-BW-NEXT: br i1 [[TMP68]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; MAX-BW: [[MIDDLE_BLOCK]]: ; MAX-BW-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; MAX-BW: [[SCALAR_PH:.*]]: -; MAX-BW-NEXT: br label %[[FOR_BODY:.*]] -; MAX-BW: [[FOR_BODY]]: -; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; MAX-BW-NEXT: [[IV_0:%.*]] = add nuw nsw i64 [[IV]], 0 -; MAX-BW-NEXT: [[IV_1:%.*]] = add nuw nsw i64 [[IV]], 1 -; MAX-BW-NEXT: [[IN0:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_0]] -; MAX-BW-NEXT: [[IN1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[IV_1]] -; MAX-BW-NEXT: [[V0:%.*]] = load i32, ptr [[IN0]], align 4 -; MAX-BW-NEXT: [[V1:%.*]] = load i32, ptr [[IN1]], align 4 -; MAX-BW-NEXT: [[REDUCE_ADD_0:%.*]] = add i32 [[V0]], [[V1]] -; MAX-BW-NEXT: [[REDUCE_ADD_0_NARROW:%.*]] = trunc i32 [[REDUCE_ADD_0]] to i8 -; MAX-BW-NEXT: [[OUT:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[IV_0]] -; MAX-BW-NEXT: store i8 [[REDUCE_ADD_0_NARROW]], ptr [[OUT]], align 1 -; MAX-BW-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV_0]], 2 -; MAX-BW-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 1024 -; MAX-BW-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP]] ; MAX-BW: [[FOR_COND_CLEANUP]]: ; MAX-BW-NEXT: ret void ; @@ -745,9 +711,10 @@ attributes #0 = { "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+sse,+s ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; CHECK: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]} +; CHECK: [[PROF8]] = !{!"branch_weights", i32 4, i32 28} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META6]], [[META7]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META6]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META6]], [[META7]]} ;. ; MAX-BW: [[INT_TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; MAX-BW: [[META2]] = !{!"int", [[META3:![0-9]+]], i64 0} @@ -756,7 +723,8 @@ attributes #0 = { "target-cpu"="core-avx2" "target-features"="+avx,+avx2,+sse,+s ; MAX-BW: [[LOOP5]] = distinct !{[[LOOP5]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAX-BW: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} ; MAX-BW: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAX-BW: [[LOOP8]] = distinct !{[[LOOP8]], [[META6]], [[META7]]} -; MAX-BW: [[LOOP9]] = distinct !{[[LOOP9]], [[META7]], [[META6]]} -; MAX-BW: [[LOOP10]] = distinct !{[[LOOP10]], [[META6]], [[META7]]} +; MAX-BW: [[PROF8]] = !{!"branch_weights", i32 4, i32 28} +; MAX-BW: [[LOOP9]] = distinct !{[[LOOP9]], [[META6]], [[META7]]} +; MAX-BW: [[LOOP10]] = distinct !{[[LOOP10]], [[META7]], [[META6]]} +; MAX-BW: [[LOOP11]] = distinct !{[[LOOP11]], [[META6]], [[META7]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index 669e925..7069534 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -28,23 +28,9 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -89,25 +75,11 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP6]], ptr [[TMP7]], i32 4, <8 x i1> [[TMP1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 430 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: br label %for.body @@ -170,28 +142,12 @@ define i32 @reduction_i32(ptr nocapture readonly %A, ptr nocapture readonly %B, ; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP10]], <8 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP11]]) -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[SUM_1:%.*]], [[FOR_BODY]] ], [ 0, [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDXA]], align 4 -; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDXB]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] -; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]] -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index 27150cb..63f9a13 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -398,27 +398,9 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[BIN_RDX13:%.*]] = add <4 x i32> [[TMP38]], [[BIN_RDX]] ; CHECK-NEXT: [[BIN_RDX14:%.*]] = add <4 x i32> [[TMP39]], [[BIN_RDX13]] ; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX14]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[BYTE:%.*]] = udiv i64 [[IV]], 8 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[BYTE]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: [[BIT:%.*]] = urem i64 [[IV]], 8 -; CHECK-NEXT: [[BIT_TRUNC:%.*]] = trunc i64 [[BIT]] to i8 -; CHECK-NEXT: [[MASK:%.*]] = lshr i8 [[EARLYCND]], [[BIT_TRUNC]] -; CHECK-NEXT: [[TEST:%.*]] = and i8 [[MASK]], 1 -; CHECK-NEXT: [[VAL:%.*]] = zext i8 [[TEST]] to i32 -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4094 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LOOP]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP41]] ; entry: %alloca = alloca [4096 x i32] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index 3ae8001..28de5c7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -141,20 +141,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP9]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 20 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -200,22 +187,9 @@ define void @vectorized2(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: store <8 x float> [[TMP5]], ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index 282e9a5..1e94f83 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -38,21 +38,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: -; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; IF-EVL: scalar.ph: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] -; IF-EVL: for.body: -; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] -; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; IF-EVL: for.cond.cleanup: ; IF-EVL-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 69cdd65..455fe83 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -74,22 +74,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_MUL:%.*]] = mul nuw i64 [[IV]], 6 -; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store i8 [[L]], ptr [[B]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[IV_MUL]] -; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX77]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[IV_MUL]], 2 -; CHECK-NEXT: [[ARRAYIDX97:%.*]] = getelementptr i8, ptr [[L_OUT]], i64 [[ADD_2]] -; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index bdedcca..9ea9e11 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -48,25 +48,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 -; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[L]], 128 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_THEN:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.then: -; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV_4]], 1 -; CHECK-NEXT: [[G_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OR]] -; CHECK-NEXT: store i64 [[IV_4]], ptr [[G_DST]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -131,25 +113,7 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[G_SRC:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[G_SRC]], align 8 -; CHECK-NEXT: [[IV_4:%.*]] = add nuw nsw i64 [[IV]], 4 -; CHECK-NEXT: [[C:%.*]] = icmp ule i64 [[L]], 128 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_THEN:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.then: -; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV_4]], 1 -; CHECK-NEXT: [[G_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[OR]] -; CHECK-NEXT: store i64 [[L]], ptr [[G_DST]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index f9403b8..774f0db 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -134,30 +134,9 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-GATHER: middle.block: ; SINK-GATHER-NEXT: [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP47]]) -; SINK-GATHER-NEXT: br label [[FOR_END:%.*]] -; SINK-GATHER: scalar.ph: -; SINK-GATHER-NEXT: br label [[FOR_BODY:%.*]] -; SINK-GATHER: for.body: -; SINK-GATHER-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; SINK-GATHER-NEXT: [[R:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[T7:%.*]], [[FOR_INC]] ] -; SINK-GATHER-NEXT: [[T0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] -; SINK-GATHER-NEXT: [[T1:%.*]] = load i32, ptr [[T0]], align 4 -; SINK-GATHER-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; SINK-GATHER: if.then: -; SINK-GATHER-NEXT: [[T2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] -; SINK-GATHER-NEXT: [[T3:%.*]] = load i32, ptr [[T2]], align 4 -; SINK-GATHER-NEXT: [[T4:%.*]] = sdiv i32 [[T3]], [[X]] -; SINK-GATHER-NEXT: [[T5:%.*]] = add nsw i32 [[T4]], [[T1]] -; SINK-GATHER-NEXT: br label [[FOR_INC]] -; SINK-GATHER: for.inc: -; SINK-GATHER-NEXT: [[T6:%.*]] = phi i32 [ [[T1]], [[FOR_BODY]] ], [ [[T5]], [[IF_THEN]] ] -; SINK-GATHER-NEXT: [[T7]] = add i32 [[R]], [[T6]] -; SINK-GATHER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; SINK-GATHER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 10000 -; SINK-GATHER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]] +; SINK-GATHER-NEXT: br label [[FOR_INC:%.*]] ; SINK-GATHER: for.end: -; SINK-GATHER-NEXT: [[T8:%.*]] = phi i32 [ [[T7]], [[FOR_INC]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] -; SINK-GATHER-NEXT: ret i32 [[T8]] +; SINK-GATHER-NEXT: ret i32 [[TMP49]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll index afdbfaa..f64255f 100644 --- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll @@ -37,11 +37,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll index ce9d1f2..b59ad84 100644 --- a/llvm/test/Transforms/LoopVectorize/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/check-prof-info.ll @@ -19,12 +19,8 @@ define void @_Z3foov() { ; CHECK: vector.body: ; CHECK: br i1 [[TMP6:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: -; CHECK: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK: for.body: -; CHECK: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF5:![0-9]+]] ; ; CHECK-MASKED-LABEL: @_Z3foov( ; CHECK-MASKED: entry: @@ -34,12 +30,8 @@ define void @_Z3foov() { ; CHECK-MASKED: vector.body: ; CHECK-MASKED: br i1 [[TMP18:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-MASKED: middle.block: -; CHECK-MASKED: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK-MASKED: scalar.ph: ; CHECK-MASKED: br label [[FOR_BODY:%.*]] ; CHECK-MASKED: for.cond.cleanup: -; CHECK-MASKED: for.body: -; CHECK-MASKED: br i1 [[EXITCOND:%.*]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !prof [[PROF5:![0-9]+]] ; ; CHECK-SCALABLE-LABEL: @_Z3foov( ; CHECK-SCALABLE: entry: diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll index bd0655d..143a0af 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll @@ -19,19 +19,6 @@ define void @test(ptr %data) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1 -; CHECK-NEXT: [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]] -; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[FLOAT_TBAA0]] -; CHECK-NEXT: [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]]) -; CHECK-NEXT: [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16 -; CHECK-NEXT: store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[SHORT_TBAA4]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/constantfolder.ll b/llvm/test/Transforms/LoopVectorize/constantfolder.ll index 37f2e73..66592b0 100644 --- a/llvm/test/Transforms/LoopVectorize/constantfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/constantfolder.ll @@ -16,20 +16,6 @@ define void @const_fold_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[CONST_0]] -; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -70,20 +56,6 @@ define void @const_fold_inbounds_ptradd(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[CONST_0]] -; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -125,20 +97,6 @@ define void @const_fold_select(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[OR:%.*]] = or i64 [[D]], [[CONST_1]] -; CHECK-NEXT: store i64 [[OR]], ptr [[DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -179,24 +137,6 @@ define void @const_fold_add_sub_mul_ashr_lshr(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[ADD:%.*]] = add i64 2, [[CONST_1]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[ADD]], [[CONST_1]] -; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SUB]], [[CONST_1]] -; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[ASHR]], 3 -; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[MUL]], [[CONST_1]] -; CHECK-NEXT: store i64 [[LSHR]], ptr [[DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -241,22 +181,6 @@ define void @const_fold_and_or_xor(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[OR:%.*]] = or i64 2, [[CONST_1]] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[OR]], [[CONST_1]] -; CHECK-NEXT: [[XOR:%.*]] = and i64 [[AND]], [[CONST_1]] -; CHECK-NEXT: store i64 [[XOR]], ptr [[DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -299,21 +223,6 @@ define void @const_fold_cmp_zext(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_1:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 1, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[VAL:%.*]] = icmp ugt i64 2, [[CONST_1]] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[VAL]] to i8 -; CHECK-NEXT: store i8 [[ZEXT]], ptr [[DST]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -355,20 +264,6 @@ define void @const_fold_trunc(ptr %dst, i64 %d) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[CONST_0:%.*]] = phi i64 [ [[D]], %[[ELSE]] ], [ 0, %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[CONST_0]] to i16 -; CHECK-NEXT: store i16 [[TRUNC]], ptr [[DST]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll index 33e688c..62399c5 100644 --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -67,19 +67,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[L2_HEADER_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[L2_INNER_HEADER:%.*]] -; CHECK: L2.Inner.header: -; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ 1, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ 1, [[SCALAR_PH]] ] -; CHECK-NEXT: [[L2_ACCUM_NEXT]] = sub i32 [[L2_ACCUM]], [[L1_EXIT_VAL]] -; CHECK-NEXT: [[L2_DUMMY_BUT_NEED_IT:%.*]] = sext i32 [[L2_ACCUM_NEXT]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[L2_IV]] -; CHECK-NEXT: store i64 [[L2_DUMMY_BUT_NEED_IT]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[L2_IV_NEXT]] = add nuw nsw i64 [[L2_IV]], 1 -; CHECK-NEXT: [[L2_EXIT_COND:%.*]] = icmp ugt i64 [[L2_IV]], 11 -; CHECK-NEXT: br i1 [[L2_EXIT_COND]], label [[L2_HEADER_LOOPEXIT]], label [[L2_INNER_HEADER]] ; CHECK: L2.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 0a8e9dc..02e1d0e 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -94,20 +94,8 @@ define void @pr47390(ptr %a) { ; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PRIMARY:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[PRIMARY_ADD:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[USE_PRIMARY:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[PRIMARY]], %[[LOOP]] ] -; CHECK-NEXT: [[SECONDARY:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[SECONDARY_ADD:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PRIMARY_ADD]] = add i32 [[PRIMARY]], 1 -; CHECK-NEXT: [[SECONDARY_ADD]] = add i32 [[SECONDARY]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[SECONDARY]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SECONDARY]], 5 -; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll index f61478b..b31b732 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc-optimize-vfuf-term.ll @@ -15,15 +15,6 @@ define i32 @foo(ptr %p) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]], !dbg [[DBG3]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[SCALAR_PH]] ], !dbg [[DBG7:![0-9]+]] -; CHECK-NEXT: [[CONV:%.*]] = trunc i64 0 to i8, !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: store i8 [[CONV]], ptr [[P]], align 1, !dbg [[DBG3]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG9:![0-9]+]] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1, !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG11:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 0 ; @@ -64,9 +55,4 @@ exit: ; preds = %loop ; CHECK: [[META4]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 11, type: [[META5:![0-9]+]], spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META6:![0-9]+]]) ; CHECK: [[META5]] = distinct !DISubroutineType(types: [[META6]]) ; CHECK: [[META6]] = !{} -; CHECK: [[DBG7]] = !DILocation(line: 4, scope: [[META4]]) -; CHECK: [[DBG8]] = !DILocation(line: 5, scope: [[META4]]) -; CHECK: [[DBG9]] = !DILocation(line: 7, scope: [[META4]]) -; CHECK: [[DBG10]] = !DILocation(line: 8, scope: [[META4]]) -; CHECK: [[DBG11]] = !DILocation(line: 9, scope: [[META4]]) ;. diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index d97624f..274bd04 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -24,16 +24,7 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] -; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 1800 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 4f5a26e9..156c2bd 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -198,16 +198,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[RIVPLUS1:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] -; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[EXIT_LOOPEXIT]], label [[LOOP]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index ff2baec..eca39e6 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -108,25 +108,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 -; CHECK-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true -; CHECK-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] -; CHECK-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] -; CHECK-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 -; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] -; CHECK-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; CHECK-NEXT: br i1 [[VEC_DEAD]], label %[[FOR_END]], label %[[LOOP]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[FOR_LCSSA]] +; CHECK-NEXT: ret i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll index fd19760..ebfe16b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-interleave-only.ll @@ -22,21 +22,8 @@ define float @for_load_interleave_only(ptr %src) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[SRC]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 16 -; CHECK-NEXT: [[L]] = load float, ptr [[PTR_IV]], align 4 -; CHECK-NEXT: store float 0.000000e+00, ptr [[PTR_IV]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi float [ [[FOR]], %[[LOOP]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[FOR_LCSSA]] +; CHECK-NEXT: ret float [[TMP2]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index 149157a..7412980 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -119,22 +119,7 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SCALAR_RECUR6:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[NEG:%.*]] = fneg float [[SCALAR_RECUR6]] -; CHECK-NEXT: [[MULADD:%.*]] = call float @llvm.fmuladd.f32(float [[SCALAR_RECUR]], float [[NEG]], float 0.000000e+00) -; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[FOR_1_NEXT]] = load float, ptr [[A]], align 4 -; CHECK-NEXT: [[FOR_2_NEXT]] = load float, ptr [[B]], align 4 -; CHECK-NEXT: store float [[MULADD]], ptr [[DST_GEP]], align 4 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 443e44b..bd0c098 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1193,19 +1193,9 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] -; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 -; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i64 [[VAR2_LCSSA]] +; UNROLL-NO-IC-NEXT: ret i64 1 ; ; UNROLL-NO-VF-LABEL: @constant_folded_previous_value( ; UNROLL-NO-VF-NEXT: entry: @@ -1218,19 +1208,9 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NO-VF-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] -; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 -; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: ret i64 [[VAR2_LCSSA]] +; UNROLL-NO-VF-NEXT: ret i64 1 ; ; SINK-AFTER-LABEL: @constant_folded_previous_value( ; SINK-AFTER-NEXT: entry: @@ -1243,19 +1223,9 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: br label [[FOR_END:%.*]] -; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] -; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] -; SINK-AFTER-NEXT: [[VAR2:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ] -; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 -; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], 1000 -; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[VAR2_LCSSA:%.*]] = phi i64 [ [[VAR2]], [[SCALAR_BODY]] ], [ 1, [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: ret i64 [[VAR2_LCSSA]] +; SINK-AFTER-NEXT: ret i64 1 ; entry: br label %scalar.body @@ -2725,21 +2695,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: -; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[VAR]] -; UNROLL-NO-IC: bb2: -; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27:![0-9]+]] +; UNROLL-NO-IC-NEXT: ret i32 [[TMP51]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region( ; UNROLL-NO-VF-NEXT: bb: @@ -2785,21 +2743,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] -; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]] -; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: -; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: ret i32 [[VAR]] -; UNROLL-NO-VF: bb2: -; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27:![0-9]+]] +; UNROLL-NO-VF-NEXT: ret i32 [[BIN_RDX]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region( ; SINK-AFTER-NEXT: bb: @@ -2868,21 +2814,9 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) -; SINK-AFTER-NEXT: br label [[BB1:%.*]] -; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: -; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: ret i32 [[VAR]] -; SINK-AFTER: bb2: -; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27:![0-9]+]] +; SINK-AFTER-NEXT: ret i32 [[TMP27]] ; bb: br label %bb2 @@ -3078,25 +3012,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]] ; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: br label [[BB1:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: -; UNROLL-NO-IC-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[VAR]] -; UNROLL-NO-IC: bb2: -; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; UNROLL-NO-IC-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 -; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27]] +; UNROLL-NO-IC-NEXT: ret i32 [[TMP75]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-VF-NEXT: bb: @@ -3155,25 +3073,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]] -; UNROLL-NO-VF-NEXT: br label [[BB1:%.*]] -; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: -; UNROLL-NO-VF-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: ret i32 [[VAR]] -; UNROLL-NO-VF: bb2: -; UNROLL-NO-VF-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; UNROLL-NO-VF-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 -; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27]] +; UNROLL-NO-VF-NEXT: ret i32 [[BIN_RDX]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region_multiple( ; SINK-AFTER-NEXT: bb: @@ -3273,25 +3175,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF25]], !llvm.loop [[LOOP28:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) -; SINK-AFTER-NEXT: br label [[BB1:%.*]] -; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: -; SINK-AFTER-NEXT: [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: ret i32 [[VAR]] -; SINK-AFTER: bb2: -; SINK-AFTER-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[Y]], [[SCALAR_PH:%.*]] ] -; SINK-AFTER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[VAR4:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ 0, [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[IV]] -; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] -; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] -; SINK-AFTER-NEXT: store i32 [[VAR3]], ptr [[G]], align 4 -; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF27]] +; SINK-AFTER-NEXT: ret i32 [[TMP39]] ; bb: br label %bb2 @@ -3341,26 +3227,9 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 -; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true -; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true -; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; UNROLL-NO-IC-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] -; UNROLL-NO-IC-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 -; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 -; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] -; UNROLL-NO-IC-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[FOR_LCSSA]] +; UNROLL-NO-IC-NEXT: ret i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]] ; ; UNROLL-NO-VF-LABEL: @sink_after_dead_inst( ; UNROLL-NO-VF-NEXT: entry: @@ -3382,26 +3251,9 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP7]], 16 ; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NO-VF: scalar.ph: ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-VF: loop: -; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 -; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true -; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true -; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; UNROLL-NO-VF-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] -; UNROLL-NO-VF-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 -; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 -; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] -; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: ret i32 [[FOR_LCSSA]] +; UNROLL-NO-VF-NEXT: ret i32 [[TMP10]] ; ; SINK-AFTER-LABEL: @sink_after_dead_inst( ; SINK-AFTER-NEXT: entry: @@ -3423,26 +3275,9 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; SINK-AFTER-NEXT: br label [[FOR_END:%.*]] -; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] -; SINK-AFTER: loop: -; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] -; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 -; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true -; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true -; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; SINK-AFTER-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] -; SINK-AFTER-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] -; SINK-AFTER-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 -; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 -; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] -; SINK-AFTER-NEXT: store i32 0, ptr [[A_GEP]], align 4 -; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]] ; SINK-AFTER: for.end: -; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: ret i32 [[FOR_LCSSA]] +; SINK-AFTER-NEXT: ret i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll index cb86f5f..2268085 100644 --- a/llvm/test/Transforms/LoopVectorize/flags.ll +++ b/llvm/test/Transforms/LoopVectorize/flags.ll @@ -129,20 +129,8 @@ define float @fast_math(ptr noalias %s) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP1]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[S]], i64 [[IV]] -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = fadd fast float [[RED]], [[TMP4]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[ADD_LCSSA]] +; CHECK-NEXT: ret float [[TMP3]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 901f67e..f56699a 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1649,11 +1649,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; VEC4_INTERL1-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VEC4_INTERL1: middle.block: -; VEC4_INTERL1-NEXT: br label [[EXIT:%.*]] -; VEC4_INTERL1: scalar.ph: ; VEC4_INTERL1-NEXT: br label [[LOOP:%.*]] -; VEC4_INTERL1: loop: -; VEC4_INTERL1-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; VEC4_INTERL1: exit: ; VEC4_INTERL1-NEXT: ret i32 0 ; @@ -1672,11 +1668,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; VEC4_INTERL2-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VEC4_INTERL2: middle.block: -; VEC4_INTERL2-NEXT: br label [[EXIT:%.*]] -; VEC4_INTERL2: scalar.ph: ; VEC4_INTERL2-NEXT: br label [[LOOP:%.*]] -; VEC4_INTERL2: loop: -; VEC4_INTERL2-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; VEC4_INTERL2: exit: ; VEC4_INTERL2-NEXT: ret i32 0 ; @@ -1699,11 +1691,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; VEC1_INTERL2-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VEC1_INTERL2: middle.block: -; VEC1_INTERL2-NEXT: br label [[EXIT:%.*]] -; VEC1_INTERL2: scalar.ph: ; VEC1_INTERL2-NEXT: br label [[LOOP:%.*]] -; VEC1_INTERL2: loop: -; VEC1_INTERL2-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; VEC1_INTERL2: exit: ; VEC1_INTERL2-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll index 93031c7..555e695 100644 --- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll @@ -66,22 +66,9 @@ define float @minloopattr(ptr nocapture readonly %arg) #0 { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP4]]) -; CHECK-NEXT: br label [[OUT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]] -; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[T3]], align 4 -; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]] -; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] -; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 -; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 -; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]] ; CHECK: out: -; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[T6_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; top: %t = load float, ptr %arg diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index c86e271..f7376a0 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -67,23 +67,7 @@ define i32 @test(ptr nocapture %f) #0 { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[INDVARS_IV]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 100 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: if.then: -; UNROLL-NOSIMPLIFY-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 20 -; UNROLL-NOSIMPLIFY-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: for.inc: -; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] +; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC:%.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 ; @@ -449,25 +433,7 @@ define void @minimal_bit_widths(i1 %c) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ 0, [[SCALAR_PH:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 1000, [[SCALAR_PH]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr undef, i64 [[TMP1]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: if.then: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP6]], ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: for.inc: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP9]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP2]], -1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]] +; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC:%.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; @@ -575,26 +541,7 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NOSIMPLIFY: middle.block: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NOSIMPLIFY: scalar.ph: -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL-NOSIMPLIFY: for.body: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_INC:%.*]] ], [ 0, [[SCALAR_PH:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ 1000, [[SCALAR_PH]] ] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: if.then: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 -; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP6]], ptr [[TMP3]], align 1 -; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]] -; UNROLL-NOSIMPLIFY: for.inc: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP9]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP2]], -1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]] +; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC:%.*]] ; UNROLL-NOSIMPLIFY: for.end: ; UNROLL-NOSIMPLIFY-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll index f0b32c6..ccf05d7 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll @@ -24,17 +24,7 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[IV]], i64 [[IV]] -; CHECK-NEXT: [[T:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/induction-step.ll b/llvm/test/Transforms/LoopVectorize/induction-step.ll index 362de0e..53d5ac4 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-step.ll @@ -291,18 +291,6 @@ define void @iv_no_binary_op_in_descriptor(i1 %c, ptr %dst) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT_P:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT_P]] = phi i64 [ [[IV_NEXT]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT_P]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 60c844c..cc55a51 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -2764,19 +2764,9 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 -; CHECK-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP2]] ; ; IND-LABEL: @i8_loop( ; IND-NEXT: entry: @@ -2789,11 +2779,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; IND-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; IND-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IND: middle.block: -; IND-NEXT: br label [[EXIT:%.*]] -; IND: scalar.ph: ; IND-NEXT: br label [[LOOP:%.*]] -; IND: loop: -; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; IND: exit: ; IND-NEXT: ret i32 0 ; @@ -2808,11 +2794,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; UNROLL-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: br label [[EXIT:%.*]] -; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] -; UNROLL: loop: -; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; UNROLL: exit: ; UNROLL-NEXT: ret i32 0 ; @@ -2833,19 +2815,9 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[B_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 -; UNROLL-NO-IC-NEXT: [[B_NEXT]] = add i8 [[B_0]], -1 -; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i8 [[B_NEXT]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; UNROLL-NO-IC: exit: -; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]] +; UNROLL-NO-IC-NEXT: ret i32 [[TMP3]] ; ; INTERLEAVE-LABEL: @i8_loop( ; INTERLEAVE-NEXT: entry: @@ -2858,11 +2830,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable { ; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: br label [[EXIT:%.*]] -; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] -; INTERLEAVE: loop: -; INTERLEAVE-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret i32 0 ; @@ -2897,19 +2865,9 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP0]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[B_0:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 -; CHECK-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[A_0_AND_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP2]] ; ; IND-LABEL: @i16_loop( ; IND-NEXT: entry: @@ -2922,11 +2880,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; IND-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; IND-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; IND: middle.block: -; IND-NEXT: br label [[EXIT:%.*]] -; IND: scalar.ph: ; IND-NEXT: br label [[LOOP:%.*]] -; IND: loop: -; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; IND: exit: ; IND-NEXT: ret i32 0 ; @@ -2941,11 +2895,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; UNROLL-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: br label [[EXIT:%.*]] -; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] -; UNROLL: loop: -; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; UNROLL: exit: ; UNROLL-NEXT: ret i32 0 ; @@ -2966,19 +2916,9 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP1]], [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[A_0_AND:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[B_0_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[A_0_AND]] = and i32 [[A_0]], 4 -; UNROLL-NO-IC-NEXT: [[B_0_NEXT]] = add i16 [[B_0]], -1 -; UNROLL-NO-IC-NEXT: [[EC:%.*]] = icmp eq i16 [[B_0_NEXT]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; UNROLL-NO-IC: exit: -; UNROLL-NO-IC-NEXT: [[A_0_AND_LCSSA:%.*]] = phi i32 [ [[A_0_AND]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[A_0_AND_LCSSA]] +; UNROLL-NO-IC-NEXT: ret i32 [[TMP3]] ; ; INTERLEAVE-LABEL: @i16_loop( ; INTERLEAVE-NEXT: entry: @@ -2991,11 +2931,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable { ; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 ; INTERLEAVE-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: br label [[EXIT:%.*]] -; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] -; INTERLEAVE: loop: -; INTERLEAVE-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; INTERLEAVE: exit: ; INTERLEAVE-NEXT: ret i32 0 ; @@ -5025,28 +4961,9 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]]) -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ -20, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[A]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] -; CHECK-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 -; CHECK-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 -; CHECK-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] -; CHECK: for.cond: -; CHECK-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] -; CHECK-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 -; CHECK-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] -; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 -; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[VAR7]] +; CHECK-NEXT: ret i32 [[TMP17]] ; ; IND-LABEL: @PR32419( ; IND-NEXT: entry: @@ -5086,15 +5003,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; IND: middle.block: -; IND-NEXT: br label [[FOR_END:%.*]] -; IND: scalar.ph: -; IND-NEXT: br label [[FOR_BODY:%.*]] -; IND: for.body: -; IND-NEXT: br i1 poison, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]] -; IND: for.cond: -; IND-NEXT: br label [[FOR_INC]] -; IND: for.inc: -; IND-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] +; IND-NEXT: br label [[FOR_INC:%.*]] ; IND: for.end: ; IND-NEXT: [[VAR7:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP14]]) ; IND-NEXT: ret i32 [[VAR7]] @@ -5160,15 +5069,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NEXT: [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 ; UNROLL-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: br label [[FOR_END:%.*]] -; UNROLL: scalar.ph: -; UNROLL-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL: for.body: -; UNROLL-NEXT: br i1 poison, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]] -; UNROLL: for.cond: -; UNROLL-NEXT: br label [[FOR_INC]] -; UNROLL: for.inc: -; UNROLL-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] +; UNROLL-NEXT: br label [[FOR_INC:%.*]] ; UNROLL: for.end: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP27]], [[TMP26]] ; UNROLL-NEXT: [[VAR7:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) @@ -5239,28 +5140,9 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]] ; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: br label [[FOR_END:%.*]] -; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] -; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ -20, [[SCALAR_PH:%.*]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] -; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[A]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] -; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 -; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] -; UNROLL-NO-IC: for.cond: -; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] -; UNROLL-NO-IC-NEXT: br label [[FOR_INC]] -; UNROLL-NO-IC: for.inc: -; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 -; UNROLL-NO-IC-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] -; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 -; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]] +; UNROLL-NO-IC-NEXT: br label [[FOR_INC:%.*]] ; UNROLL-NO-IC: for.end: -; UNROLL-NO-IC-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: ret i32 [[VAR7]] +; UNROLL-NO-IC-NEXT: ret i32 [[TMP31]] ; ; INTERLEAVE-LABEL: @PR32419( ; INTERLEAVE-NEXT: entry: @@ -5818,23 +5700,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RECUR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] -; CHECK-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST]], i32 [[IV_TRUNC]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -5862,11 +5728,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; IND-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IND-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; IND: middle.block: -; IND-NEXT: br label [[EXIT:%.*]] -; IND: scalar.ph: ; IND-NEXT: br label [[LOOP:%.*]] -; IND: loop: -; IND-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; IND: exit: ; IND-NEXT: ret void ; @@ -5900,11 +5762,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: br label [[EXIT:%.*]] -; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] -; UNROLL: loop: -; UNROLL-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; UNROLL: exit: ; UNROLL-NEXT: ret void ; @@ -5937,23 +5795,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: br label [[EXIT:%.*]] -; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] -; UNROLL-NO-IC: loop: -; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[RECUR:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] -; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, ptr [[SRC]], align 4 -; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[RECUR]] -; UNROLL-NO-IC-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 -; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; UNROLL-NO-IC-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 -; UNROLL-NO-IC-NEXT: [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST]], i32 [[IV_TRUNC]] -; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] -; UNROLL-NO-IC-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4 -; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]] ; UNROLL-NO-IC: exit: ; UNROLL-NO-IC-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll index 9222af9..8975c05 100644 --- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll @@ -18,23 +18,9 @@ define i32 @one_direct_branch(ptr %src) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[XOR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %loop @@ -73,26 +59,9 @@ define i32 @two_direct_branch(ptr %src) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] -; CHECK-NEXT: br label [[BB:%.*]] -; CHECK: bb: -; CHECK-NEXT: [[PHI_XOR_1:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[PHI_XOR_1]], [[BB]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[XOR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; entry: br label %loop @@ -141,26 +110,9 @@ define i32 @cond_branch(i32 %a, ptr %src) { ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[THEN:%.*]] -; CHECK: then: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ], [ 10, [[THEN]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[XOR_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; entry: br label %loop @@ -205,18 +157,9 @@ define i32 @optimizable_trunc_used_outside() { ; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[IV_TRUNC_LCSSA:%.*]] = phi i32 [ [[IV_TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[IV_TRUNC_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP1]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 1128dd3..2c97bb7 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -33,19 +33,6 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_I65:%.*]] = phi i65 [ 0, %[[SCALAR_PH]] ], [ [[IV_I65_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[TRUNC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[TRUNC]] = trunc i65 [[IV_I65]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TRUNC]] -; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: [[IV_I65_NEXT]] = add i65 [[IV_I65]], -1 -; CHECK-NEXT: br i1 [[ICMP]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll index 85e7477..eca9c1f 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll @@ -27,23 +27,6 @@ define void @gep_for_first_member_does_not_dominate_insert_point(ptr %str, ptr n ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i64 [[IV2]], 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[STR]], i64 [[OR_1]] -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[GEP1]], align 1 -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr [[STR]], i64 [[IV2]] -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[GEP0]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[IV2_NEXT]] = add i64 [[IV2]], 2 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll index 4dc9cfd..bd0fd77 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll @@ -45,23 +45,6 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[IV]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[X]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP19]], 2.000000e+00 -; CHECK-NEXT: [[X4:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[IV]], i32 0 -; CHECK-NEXT: store double [[MUL]], ptr [[X4]], align 8, !tbaa [[TBAA10:![0-9]+]] -; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[IV]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[Y]], align 8, !tbaa [[TBAA5]] -; CHECK-NEXT: [[MUL7:%.*]] = fmul double [[TMP20]], 3.000000e+00 -; CHECK-NEXT: [[Y10:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[IV]], i32 1 -; CHECK-NEXT: store double [[MUL7]], ptr [[Y10]], align 8, !tbaa [[TBAA12:![0-9]+]] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -126,20 +109,20 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META13:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[SRC]], align 4, !alias.scope [[META10:![0-9]+]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META16:![0-9]+]], !noalias [[META13]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x float>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META13:![0-9]+]], !noalias [[META10]] ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 0, i32 2> ; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x float> [[WIDE_VEC]], <4 x float> poison, <2 x i32> <i32 1, i32 3> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[STRIDED_VEC3]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3> -; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: store <4 x float> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4, !alias.scope [[META13]], !noalias [[META10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -152,9 +135,9 @@ define void @ir_tbaa_different(ptr %base, ptr %end, ptr %src) { ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 8 ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[L_1]], [[L_INVAR]] -; CHECK-NEXT: store float [[MUL_1]], ptr [[PTR_IV]], align 4, !tbaa [[TBAA10]] +; CHECK-NEXT: store float [[MUL_1]], ptr [[PTR_IV]], align 4, !tbaa [[TBAA16:![0-9]+]] ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR_IV]], i64 4 -; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_1]], align 4, !tbaa [[TBAA12]] +; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_1]], align 4, !tbaa [[TBAA18:![0-9]+]] ; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[L_2]], [[L_INVAR]] ; CHECK-NEXT: store float [[MUL_2]], ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] @@ -278,15 +261,15 @@ exit: ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} ; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META2]], i64 0} -; CHECK: [[META11]] = !{!"Vec2r", [[META2]], i64 0, [[META2]], i64 8} -; CHECK: [[TBAA12]] = !{[[META11]], [[META2]], i64 8} +; CHECK: [[META10]] = !{[[META11:![0-9]+]]} +; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]]} +; CHECK: [[META12]] = distinct !{[[META12]], !"LVerDomain"} ; CHECK: [[META13]] = !{[[META14:![0-9]+]]} -; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]} -; CHECK: [[META15]] = distinct !{[[META15]], !"LVerDomain"} -; CHECK: [[META16]] = !{[[META17:![0-9]+]]} -; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]} -; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META8]], [[META9]]} +; CHECK: [[META14]] = distinct !{[[META14]], [[META12]]} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META8]], [[META9]]} +; CHECK: [[TBAA16]] = !{[[META17:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META17]] = !{!"Vec2r", [[META2]], i64 0, [[META2]], i64 8} +; CHECK: [[TBAA18]] = !{[[META17]], [[META2]], i64 8} ; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META8]]} ; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META8]], [[META9]]} ; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META9]], [[META8]]} diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 4885dd2..b4cad11 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -47,11 +47,7 @@ define void @test_array_load2_store2(i32 %C, i32 %D) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -124,11 +120,7 @@ define void @test_struct_array_load3_store3() { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -206,11 +198,7 @@ define i32 @test_struct_load4(ptr nocapture readonly %S) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[SUB8_LCSSA:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[SUB8_LCSSA]] @@ -279,13 +267,9 @@ define void @test_struct_store4(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; entry: br label %for.body @@ -365,13 +349,9 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] ; entry: br label %for.body @@ -619,11 +599,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture % ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -681,13 +657,9 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]] ; entry: br label %for.body @@ -753,13 +725,9 @@ define void @mixed_load3_store3(ptr nocapture %A) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; entry: br label %for.body @@ -836,17 +804,13 @@ define void @int_float_struct(ptr nocapture readonly %A) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) ; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr @SA, align 4 ; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4 ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll index ab70c14..6c4ee5b7 100644 --- a/llvm/test/Transforms/LoopVectorize/is_fpclass.ll +++ b/llvm/test/Transforms/LoopVectorize/is_fpclass.ll @@ -20,19 +20,7 @@ define void @d() { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[I7:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I3:%.*]] = load float, ptr null, align 4 -; CHECK-NEXT: [[I4:%.*]] = getelementptr float, ptr @d, i64 [[I]] -; CHECK-NEXT: [[I5:%.*]] = tail call i1 @llvm.is.fpclass.f32(float [[I3]], i32 0) -; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], float 0.000000e+00, float 1.000000e+00 -; CHECK-NEXT: store float [[I6]], ptr [[I4]], align 4 -; CHECK-NEXT: [[I7]] = add i64 [[I]], 1 -; CHECK-NEXT: [[I8:%.*]] = icmp eq i64 [[I7]], 128 -; CHECK-NEXT: br i1 [[I8]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index e662039..70b1ea1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -31,21 +31,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807 ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331 ; IC1VF4-NEXT: br label %[[EXIT:.*]] -; IC1VF4: [[SCALAR_PH:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC1VF4-NEXT: ret i64 [[RDX_SELECT]] ; ; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start( ; IC4VF4-SAME: ptr [[A:%.*]]) { @@ -101,21 +88,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807 ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331 ; IC4VF4-NEXT: br label %[[EXIT:.*]] -; IC4VF4: [[SCALAR_PH:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC4VF4-NEXT: ret i64 [[RDX_SELECT]] ; ; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start( ; IC4VF1-SAME: ptr [[A:%.*]]) { @@ -159,21 +133,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807 ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 ; IC4VF1-NEXT: br label %[[EXIT:.*]] -; IC4VF1: [[SCALAR_PH:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC4VF1-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop @@ -227,21 +188,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767 ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 ; IC1VF4-NEXT: br label %[[EXIT:.*]] -; IC1VF4: [[SCALAR_PH:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 -; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC1VF4-NEXT: ret i16 [[RDX_SELECT]] ; ; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( ; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) { @@ -460,21 +408,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767 ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0 ; IC4VF4-NEXT: br label %[[EXIT:.*]] -; IC4VF4: [[SCALAR_PH:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 -; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC4VF4-NEXT: ret i16 [[RDX_SELECT]] ; ; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16( ; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) { @@ -523,21 +458,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767 ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 ; IC4VF1-NEXT: br label %[[EXIT:.*]] -; IC4VF1: [[SCALAR_PH:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1 -; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC4VF1-NEXT: ret i16 [[RDX_SELECT]] ; entry: br label %loop @@ -592,21 +514,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767 ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0 ; IC1VF4-NEXT: br label %[[EXIT:.*]] -; IC1VF4: [[SCALAR_PH:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 -; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC1VF4-NEXT: ret i16 [[RDX_SELECT]] ; ; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half( ; IC4VF4-SAME: half noundef [[VAL:%.*]]) { @@ -825,21 +734,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767 ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0 ; IC4VF4-NEXT: br label %[[EXIT:.*]] -; IC4VF4: [[SCALAR_PH:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 -; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC4VF4-NEXT: ret i16 [[RDX_SELECT]] ; ; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half( ; IC4VF1-SAME: half noundef [[VAL:%.*]]) { @@ -888,21 +784,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767 ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0 ; IC4VF1-NEXT: br label %[[EXIT:.*]] -; IC4VF1: [[SCALAR_PH:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]] -; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1 -; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1 -; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]] -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]] +; IC4VF1-NEXT: ret i16 [[RDX_SELECT]] ; entry: br label %loop @@ -954,21 +837,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -1 ; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331 ; IC1VF4-NEXT: br label %[[EXIT:.*]] -; IC1VF4: [[SCALAR_PH:.*]]: -; IC1VF4-NEXT: br label %[[LOOP:.*]] -; IC1VF4: [[LOOP]]: -; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC1VF4: [[EXIT]]: -; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC1VF4-NEXT: ret i64 [[RDX_SELECT]] ; ; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned( ; IC4VF4-SAME: ptr [[A:%.*]]) { @@ -1024,21 +894,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], -1 ; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331 ; IC4VF4-NEXT: br label %[[EXIT:.*]] -; IC4VF4: [[SCALAR_PH:.*]]: -; IC4VF4-NEXT: br label %[[LOOP:.*]] -; IC4VF4: [[LOOP]]: -; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF4: [[EXIT]]: -; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC4VF4-NEXT: ret i64 [[RDX_SELECT]] ; ; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned( ; IC4VF1-SAME: ptr [[A:%.*]]) { @@ -1082,21 +939,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1 ; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 ; IC4VF1-NEXT: br label %[[EXIT:.*]] -; IC4VF1: [[SCALAR_PH:.*]]: -; IC4VF1-NEXT: br label %[[LOOP:.*]] -; IC4VF1: [[LOOP]]: -; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] -; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8 -; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3 -; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]] -; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0 -; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]] ; IC4VF1: [[EXIT]]: -; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; IC4VF1-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index 0ace547..b991d58 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -261,22 +261,8 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC1: [[SCALAR_PH:.*]]: -; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3 -; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]] -; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 -; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC1: [[EXIT]]: -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC1-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { @@ -322,22 +308,8 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC4: [[SCALAR_PH:.*]]: -; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3 -; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]] -; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 -; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC4: [[EXIT]]: -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC4-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { @@ -384,22 +356,8 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF1IC4: [[SCALAR_PH:.*]]: -; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3 -; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]] -; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 -; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF1IC4: [[EXIT]]: -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF1IC4-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -446,22 +404,8 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC1: [[SCALAR_PH:.*]]: -; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP7]], 0.000000e+00 -; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]] -; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 -; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC1: [[EXIT]]: -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC1-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { @@ -507,22 +451,8 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC4: [[SCALAR_PH:.*]]: -; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00 -; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]] -; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 -; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC4: [[EXIT]]: -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC4-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { @@ -569,22 +499,8 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF1IC4: [[SCALAR_PH:.*]]: -; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP26]], 0.000000e+00 -; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]] -; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 -; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF1IC4: [[EXIT]]: -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF1IC4-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body @@ -636,22 +552,8 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP5]], 0 ; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP5]], i32 331 ; CHECK-VF4IC1-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC1: [[SCALAR_PH:.*]]: -; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC1: [[FOR_BODY]]: -; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV1]] -; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 -; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV1]] to i32 -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] -; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 -; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC1: [[EXIT]]: -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC1-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF4IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( ; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { @@ -698,22 +600,8 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP14]], 0 ; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP14]], i32 331 ; CHECK-VF4IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF4IC4: [[SCALAR_PH:.*]]: -; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF4IC4: [[FOR_BODY]]: -; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 3 -; CHECK-VF4IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] -; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 -; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF4IC4: [[EXIT]]: -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF4IC4-NEXT: ret i32 [[RDX_SELECT]] ; ; CHECK-VF1IC4-LABEL: define i32 @select_icmp_truncated_unsigned_iv_range( ; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { @@ -762,22 +650,8 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX6]], 0 ; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX6]], i32 331 ; CHECK-VF1IC4-NEXT: br label %[[EXIT:.*]] -; CHECK-VF1IC4: [[SCALAR_PH:.*]]: -; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-VF1IC4: [[FOR_BODY]]: -; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 3 -; CHECK-VF1IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] -; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 -; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK-VF1IC4: [[EXIT]]: -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] -; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK-VF1IC4-NEXT: ret i32 [[RDX_SELECT]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 3f91baa..86515eb 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -102,16 +102,8 @@ define i32 @constpre() { ; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ 32, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[INC]] = sub nsw i32 [[INC_PHI]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 0 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[INC_PHI_LCSSA:%.*]] = phi i32 [ [[INC_PHI]], %[[FOR_BODY]] ], [ 2, %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[INC_PHI_LCSSA]] +; CHECK-NEXT: ret i32 2 ; entry: br label %for.body @@ -142,18 +134,8 @@ define ptr @geppre(ptr %ptr) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -16 ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INC_PHI:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[PTR_PHI:%.*]] = phi ptr [ [[PTR]], %[[SCALAR_PH]] ], [ [[INC_PTR:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[INC]] = add nsw i32 [[INC_PHI]], 1 -; CHECK-NEXT: [[INC_PTR]] = getelementptr i32, ptr [[PTR_PHI]], i32 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 32 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[PTR_PHI_LCSSA:%.*]] = phi ptr [ [[PTR_PHI]], %[[FOR_BODY]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret ptr [[PTR_PHI_LCSSA]] +; CHECK-NEXT: ret ptr [[IND_ESCAPE]] ; entry: br label %for.body @@ -411,18 +393,8 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] -; VEC: [[SCALAR_PH:.*]]: -; VEC-NEXT: br label %[[LOOP:.*]] -; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 -; VEC-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] -; VEC-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 -; VEC-NEXT: [[EXITCOND:%.*]] = icmp ugt i64 [[IV]], 1000 -; VEC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ], [ 1001, %[[MIDDLE_BLOCK]] ] -; VEC-NEXT: ret i64 [[IV_LCSSA]] +; VEC-NEXT: ret i64 1001 ; ; INTERLEAVE-LABEL: define i64 @iv_scalar_steps_and_outside_users( ; INTERLEAVE-SAME: ptr [[PTR:%.*]]) { @@ -442,18 +414,8 @@ define i64 @iv_scalar_steps_and_outside_users(ptr %ptr) { ; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] -; INTERLEAVE-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp ugt i64 [[IV]], 1000 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ], [ 1001, %[[MIDDLE_BLOCK]] ] -; INTERLEAVE-NEXT: ret i64 [[IV_LCSSA]] +; INTERLEAVE-NEXT: ret i64 1001 ; entry: br label %loop @@ -491,20 +453,8 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] -; VEC: [[SCALAR_PH:.*]]: -; VEC-NEXT: br label %[[LOOP:.*]] -; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 -; VEC-NEXT: [[IV_2_NEXT]] = add nuw i32 [[IV_2]], 2 -; VEC-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] -; VEC-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 -; VEC-NEXT: [[EXITCOND:%.*]] = icmp ugt i64 [[IV]], 1000 -; VEC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_2_LCSSA:%.*]] = phi i32 [ [[IV_2]], %[[LOOP]] ], [ 2002, %[[MIDDLE_BLOCK]] ] -; VEC-NEXT: ret i32 [[IV_2_LCSSA]] +; VEC-NEXT: ret i32 2002 ; ; INTERLEAVE-LABEL: define i32 @iv_2_dead_in_loop_only_used_outside( ; INTERLEAVE-SAME: ptr [[PTR:%.*]]) { @@ -524,20 +474,8 @@ define i32 @iv_2_dead_in_loop_only_used_outside(ptr %ptr) { ; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nuw i32 [[IV_2]], 2 -; INTERLEAVE-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[IV]] -; INTERLEAVE-NEXT: store i64 [[IV]], ptr [[GEP_PTR]], align 4 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp ugt i64 [[IV]], 1000 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_2_LCSSA:%.*]] = phi i32 [ [[IV_2]], %[[LOOP]] ], [ 2002, %[[MIDDLE_BLOCK]] ] -; INTERLEAVE-NEXT: ret i32 [[IV_2_LCSSA]] +; INTERLEAVE-NEXT: ret i32 2002 ; entry: br label %loop @@ -1092,18 +1030,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[E_EXIT:.*]] -; VEC: [[SCALAR_PH:.*]]: -; VEC-NEXT: br label %[[LOOP:.*]] -; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] -; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; VEC-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] -; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 -; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]] ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; VEC-NEXT: ret i32 [[RES]] +; VEC-NEXT: ret i32 [[TMP5]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { @@ -1126,18 +1054,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] -; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[IV]] -; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 -; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]] ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; INTERLEAVE-NEXT: ret i32 [[RES]] +; INTERLEAVE-NEXT: ret i32 [[TMP5]] ; entry: %step.1 = sext i8 0 to i32 @@ -1187,19 +1105,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; VEC-NEXT: br label %[[E_EXIT:.*]] -; VEC: [[SCALAR_PH:.*]]: -; VEC-NEXT: br label %[[LOOP:.*]] -; VEC: [[LOOP]]: -; VEC-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] -; VEC-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; VEC-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 -; VEC-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[INC]] -; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 -; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]] ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] -; VEC-NEXT: ret i32 [[RES]] +; VEC-NEXT: ret i32 [[TMP7]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { @@ -1224,19 +1131,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[IV]] -; INTERLEAVE-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; INTERLEAVE-NEXT: [[INC:%.*]] = add i32 [[IV]], 1 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[STEP_2]], [[INC]] -; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 -; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]] ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; INTERLEAVE-NEXT: ret i32 [[RES]] +; INTERLEAVE-NEXT: ret i32 [[TMP5]] ; entry: %step.1 = sext i8 0 to i32 @@ -1356,24 +1252,12 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 -; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1 +; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 ; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] -; VEC: [[SCALAR_PH:.*]]: -; VEC-NEXT: br label %[[LOOP:.*]] -; VEC: [[LOOP]]: -; VEC-NEXT: [[IV_1:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[IV_2:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] -; VEC-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV_1]] -; VEC-NEXT: store i16 1, ptr [[GEP]], align 2 -; VEC-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 -; VEC-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_2_NEXT]], 0 -; VEC-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 -; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] +; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA1]] ; ; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented( ; INTERLEAVE-SAME: ptr [[DST:%.*]]) { @@ -1387,24 +1271,12 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2 ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1 +; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP2]], 1 ; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV_1:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV_1]] -; INTERLEAVE-NEXT: store i16 1, ptr [[GEP]], align 2 -; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 -; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_2_NEXT]], 0 -; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 -; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] -; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] +; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA1]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 11d48df..9358fd9 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -48,29 +48,9 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) { ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP15]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i16 [[TMP17]] ; entry: %alloca = alloca [163840 x i16], align 4 @@ -142,29 +122,9 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP15]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP17]] ; entry: %alloca = alloca [163840 x i32], align 4 @@ -370,26 +330,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP19]], 3 -; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP20]], 2 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void @@ -481,27 +422,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1023, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[OFF:%.*]] = add i64 [[IV]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFF]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP22]], 3 -; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFF]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP23]], 2 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[OFF]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void @@ -574,30 +495,9 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE]], i64 [[IV]] -; CHECK-NEXT: [[L_T:%.*]] = load i8, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[L_T]], 0 -; CHECK-NEXT: br i1 [[CMP]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[IV_STRIDE:%.*]] = mul i64 [[IV]], 2 -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[IV_STRIDE]] -; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i16 [[TMP15]] ; entry: %alloca = alloca [163840 x i16], align 4 @@ -681,27 +581,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 511, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp eq i32 [[TMP21]], 3 -; CHECK-NEXT: br i1 [[CMP3_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[IV_STRIDED:%.*]] = mul i64 [[IV]], 2 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[IV_STRIDED]] -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP22]], 2 -; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[IV]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX7]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll index b224534..b14a1cd 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll @@ -52,28 +52,9 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP20:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[TMP18]]) -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ -1000, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[ACCUM:%.*]] = phi i8 [ 0, [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[TEST_ADDR:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[IV]] -; CHECK-NEXT: [[EARLYCND:%.*]] = load i1, ptr [[TEST_ADDR]], align 1 -; CHECK-NEXT: br i1 [[EARLYCND]], label [[PRED:%.*]], label [[LATCH]] -; CHECK: pred: -; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[ADDR]], align 1 -; CHECK-NEXT: br label [[LATCH]] -; CHECK: latch: -; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] -; CHECK-NEXT: [[ACCUM_NEXT]] = add i8 [[ACCUM]], [[VAL_PHI]] -; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i16 [[IV]], -990 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i8 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i8 [[ACCUM_NEXT_LCSSA]] +; CHECK-NEXT: ret i8 [[TMP20]] ; entry: %alloca = alloca [64638 x i8] diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index f44fc4e..096a0a8 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -30,28 +30,6 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[D]], 0 -; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: if.else: -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[TMP_0:%.*]] = phi i32 [ [[L_A]], [[IF_THEN]] ], [ [[L_B]], [[IF_ELSE]] ] -; CHECK-NEXT: store i32 [[TMP_0]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 32000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_HEADER]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -265,29 +243,6 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[D]], 0 -; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: if.else: -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i64, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[T:%.*]] = trunc i64 [[L_B]] to i32 -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[TMP_0:%.*]] = phi i32 [ [[L_A]], [[IF_THEN]] ], [ [[T]], [[IF_ELSE]] ] -; CHECK-NEXT: store i32 [[TMP_0]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 32000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_HEADER]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index c589c77..aed1e29 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -79,17 +79,7 @@ define void @bottom_tested(ptr %p, i32 %n) { ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TAILFOLD: middle.block: -; TAILFOLD-NEXT: br label [[IF_END:%.*]] -; TAILFOLD: scalar.ph: ; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] -; TAILFOLD: for.cond: -; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] -; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 -; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]] -; TAILFOLD-NEXT: store i16 0, ptr [[B]], align 4 -; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 -; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] -; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]] ; TAILFOLD: if.end: ; TAILFOLD-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll index 781980d..1fe802f 100644 --- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll +++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll @@ -32,17 +32,6 @@ define void @scalar_loop_dead(ptr noundef captures(none) %a, float noundef %x) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]] -; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -83,7 +72,7 @@ define void @scalar_loop_live(ptr noundef captures(none) %a, float noundef %x, i ; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -98,7 +87,7 @@ define void @scalar_loop_live(ptr noundef captures(none) %a, float noundef %x, i ; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -128,9 +117,6 @@ exit: ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.count", i32 8} ; CHECK: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} -; CHECK: [[META5]] = !{!"llvm.loop.vectorize.enable", i1 true} -; CHECK: [[META6]] = !{!"llvm.loop.vectorize.followup_all", [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]], [[META3]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]], [[META3]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index bb51992..30ee480 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -69,19 +69,7 @@ define void @maxvf3() { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[J_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[AJ:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[J]] -; CHECK-NEXT: store i8 69, ptr [[AJ]], align 8 -; CHECK-NEXT: [[JP3:%.*]] = add nuw nsw i32 3, [[J]] -; CHECK-NEXT: [[AJP3:%.*]] = getelementptr inbounds [18 x i8], ptr @a, i32 0, i32 [[JP3]] -; CHECK-NEXT: store i8 7, ptr [[AJP3]], align 8 -; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index e2dadff..3c59a27 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -142,18 +142,6 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -180,18 +168,6 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4, !tbaa [[CHAR_TBAA0]], !range [[RNG9:![0-9]+]] -; INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR1:[0-9]+]], !range [[RNG9]] -; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: ; INTERLEAVE-NEXT: ret void ; @@ -229,21 +205,9 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] -; CHECK-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -267,21 +231,9 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @bar(double [[LOAD]]) #[[ATTR2:[0-9]+]], !fpmath [[META3]] -; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] -; INTERLEAVE-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: ; INTERLEAVE-NEXT: ret void ; @@ -319,21 +271,9 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 -; CHECK-NEXT: [[CALL:%.*]] = call i64 @llvm.abs.i64(i64 [[LOAD]], i1 true), !range [[RNG9]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -357,21 +297,9 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 4 -; INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @llvm.abs.i64(i64 [[LOAD]], i1 true), !range [[RNG9]] -; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: ; INTERLEAVE-NEXT: ret void ; @@ -409,21 +337,9 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] -; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -447,21 +363,9 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] -; INTERLEAVE: [[SCALAR_PH:.*]]: -; INTERLEAVE-NEXT: br label %[[LOOP:.*]] -; INTERLEAVE: [[LOOP]]: -; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[B]], i64 [[IV]] -; INTERLEAVE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[CALL:%.*]] = call double @llvm.sin.f64(double [[LOAD]]) #[[ATTR2]], !fpmath [[META3]] -; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] -; INTERLEAVE-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 8 -; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; INTERLEAVE: [[EXIT]]: ; INTERLEAVE-NEXT: ret void ; @@ -506,7 +410,7 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[TMP3]], splat (i32 2) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SIZE]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -522,7 +426,7 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; CHECK-NEXT: store ptr [[ARRAYIDX_2]], ptr [[ARRAYIDX_1]], align 8, !custom_md [[META2]] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !custom_md [[META2]] ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]], !custom_md [[META2]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP14:![0-9]+]], !custom_md [[META2]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]], !custom_md [[META2]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -555,7 +459,7 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; INTERLEAVE-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[STEP_ADD3]], splat (i32 2) ; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; INTERLEAVE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SIZE]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -571,7 +475,7 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: store ptr [[ARRAYIDX_2]], ptr [[ARRAYIDX_1]], align 8, !custom_md [[META2]] ; INTERLEAVE-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !custom_md [[META2]] ; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[SIZE]], !custom_md [[META2]] -; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP14:![0-9]+]], !custom_md [[META2]] +; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]], !custom_md [[META2]] ; INTERLEAVE: [[EXIT]]: ; INTERLEAVE-NEXT: ret void ; @@ -617,12 +521,11 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ ; CHECK: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META6]], [[META5]]} ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]], [[META6]]} -; CHECK: [[RNG9]] = !{i64 0, i64 2} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]], [[META6]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]], [[META6]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]], [[META6]]} -; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META6]], [[META5]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META5]]} ;. ; INTERLEAVE: [[CHAR_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0, i64 0} ; INTERLEAVE: [[META1]] = !{!"omnipotent char", [[META2]]} @@ -633,10 +536,9 @@ attributes #1 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2v_bar(bar_ ; INTERLEAVE: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} ; INTERLEAVE: [[LOOP7]] = distinct !{[[LOOP7]], [[META6]], [[META5]]} ; INTERLEAVE: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]], [[META6]]} -; INTERLEAVE: [[RNG9]] = !{i64 0, i64 2} +; INTERLEAVE: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]], [[META6]]} ; INTERLEAVE: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} ; INTERLEAVE: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} ; INTERLEAVE: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]], [[META6]]} -; INTERLEAVE: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]], [[META6]]} -; INTERLEAVE: [[LOOP14]] = distinct !{[[LOOP14]], [[META6]], [[META5]]} +; INTERLEAVE: [[LOOP13]] = distinct !{[[LOOP13]], [[META6]], [[META5]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll index 7866728..47a2a84 100644 --- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll @@ -26,20 +26,8 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.maximumnum.f32(float [[RED]], float [[L]]) -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %loop @@ -82,20 +70,8 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = tail call fast float @llvm.maximumnum.f32(float [[RED]], float [[L]]) -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %loop @@ -138,20 +114,8 @@ define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = tail call float @llvm.minimumnum.f32(float [[RED]], float [[L]]) -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %loop @@ -194,20 +158,8 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call fast <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> [[RDX_MINMAX]]) ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi float [ 0.000000e+00, %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV1]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[RED_NEXT]] = tail call fast float @llvm.minimumnum.f32(float [[RED]], float [[L]]) -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[INC]], 1024 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi float [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RED_NEXT_LCSSA]] +; CHECK-NEXT: ret float [[TMP6]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll index 2e88ff6..a1fc1b8 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll @@ -34,10 +34,6 @@ define i32 @main() #0 { ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll index d928a4b..b19f9c5 100644 --- a/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/multiple-result-intrinsics.ll @@ -12,14 +12,7 @@ define void @sincos_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; entry: @@ -55,14 +48,7 @@ define void @sincos_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noali ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincos.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; entry: @@ -91,9 +77,9 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly ; CHECK-LABEL: define void @predicated_sincos( ; CHECK-SAME: float [[X:%.*]], ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) { ; CHECK: [[ENTRY:.*:]] -; CHECK: [[VECTOR_BODY1:.*]]: -; CHECK: [[VECTOR_BODY:.*:]] -; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY1]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN2:.*]] ] +; CHECK: [[VECTOR_BODY:.*]]: +; CHECK: [[VECTOR_BODY1:.*:]] +; CHECK: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[IF_THEN1:.*]] ] ; CHECK: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> [[WIDE_LOAD:%.*]]) ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0 ; CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1 @@ -107,23 +93,14 @@ define void @predicated_sincos(float %x, ptr noalias %in, ptr noalias writeonly ; CHECK: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 -; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN2]] +; CHECK: br i1 [[TMP12]], label %[[PRED_STORE_IF1:.*]], label %[[IF_THEN1]] ; CHECK: [[PRED_STORE_IF1]]: ; CHECK: [[TMP15:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 ; CHECK: store float [[TMP15]], ptr [[TMP14:%.*]], align 4 ; CHECK: [[TMP17:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 ; CHECK: store float [[TMP17]], ptr [[TMP16:%.*]], align 4 -; CHECK: br label %[[IF_THEN2]] -; CHECK: [[IF_THEN2]]: -; CHECK: [[IF_THEN:.*:]] -; CHECK: [[IF_THEN3:.*:]] -; CHECK: [[IF_THEN4:.*:]] -; CHECK: [[IF_THEN1:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincos.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 +; CHECK: br label %[[IF_THEN1]] +; CHECK: [[IF_THEN1]]: ; CHECK: [[IF_MERGE:.*:]] ; CHECK: [[FOR_END:.*:]] ; @@ -167,14 +144,7 @@ define void @modf_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.modf.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; entry: @@ -210,14 +180,7 @@ define void @modf_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.modf.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; entry: @@ -253,14 +216,7 @@ define void @sincospi_f32(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 ; CHECK: store <2 x float> [[TMP4]], ptr [[TMP7:%.*]], align 4 ; CHECK: store <2 x float> [[TMP5]], ptr [[TMP9:%.*]], align 4 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { float, float } @llvm.sincospi.f32(float [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { float, float } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { float, float } [[CALL]], 1 -; CHECK: store float [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 4 -; CHECK: store float [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 4 ; CHECK: [[EXIT:.*:]] ; entry: @@ -296,14 +252,7 @@ define void @sincospi_f64(ptr noalias %in, ptr noalias writeonly %out_a, ptr noa ; CHECK: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP3]], 1 ; CHECK: store <2 x double> [[TMP4]], ptr [[TMP7:%.*]], align 8 ; CHECK: store <2 x double> [[TMP5]], ptr [[TMP9:%.*]], align 8 -; CHECK: [[MIDDLE_BLOCK:.*:]] -; CHECK: [[SCALAR_PH:.*:]] ; CHECK: [[FOR_BODY:.*:]] -; CHECK: [[CALL:%.*]] = tail call { double, double } @llvm.sincospi.f64(double [[IN_VAL:%.*]]) -; CHECK: [[EXTRACT_A:%.*]] = extractvalue { double, double } [[CALL]], 0 -; CHECK: [[EXTRACT_B:%.*]] = extractvalue { double, double } [[CALL]], 1 -; CHECK: store double [[EXTRACT_A]], ptr [[ARRAYIDX2:%.*]], align 8 -; CHECK: store double [[EXTRACT_B]], ptr [[ARRAYIDX4:%.*]], align 8 ; CHECK: [[EXIT:.*:]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 9b6774e..481fa04 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -26,20 +26,6 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[TMP7]], 1.000000e+02 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]]) -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP7]], 1.000000e+00 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1599 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 819cfaa..9f82795 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -273,19 +273,8 @@ define void @pr43371() optsize { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY29:.*]] ; CHECK: [[FOR_COND_CLEANUP28]]: ; CHECK-NEXT: unreachable -; CHECK: [[FOR_BODY29]]: -; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] -; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; CHECK-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; CHECK-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; PGSO-LABEL: define void @pr43371( ; PGSO-SAME: ) #[[ATTR0]] { @@ -310,19 +299,8 @@ define void @pr43371() optsize { ; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; PGSO: [[SCALAR_PH:.*]]: -; PGSO-NEXT: br label %[[FOR_BODY29:.*]] ; PGSO: [[FOR_COND_CLEANUP28]]: ; PGSO-NEXT: unreachable -; PGSO: [[FOR_BODY29]]: -; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] -; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; PGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; PGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; PGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; PGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; NPGSO-LABEL: define void @pr43371( ; NPGSO-SAME: ) #[[ATTR0]] { @@ -347,19 +325,8 @@ define void @pr43371() optsize { ; NPGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; NPGSO: [[SCALAR_PH:.*]]: -; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] ; NPGSO: [[FOR_COND_CLEANUP28]]: ; NPGSO-NEXT: unreachable -; NPGSO: [[FOR_BODY29]]: -; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; NPGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] -; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; We do not want to generate SCEV predicates when optimising for size, because ; that will lead to extra code generation such as the SCEV overflow runtime @@ -407,19 +374,8 @@ define void @pr43371_pgso() !prof !14 { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY29:.*]] ; CHECK: [[FOR_COND_CLEANUP28]]: ; CHECK-NEXT: unreachable -; CHECK: [[FOR_BODY29]]: -; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] -; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; CHECK-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; CHECK-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; PGSO-LABEL: define void @pr43371_pgso( ; PGSO-SAME: ) !prof [[PROF14]] { @@ -444,19 +400,8 @@ define void @pr43371_pgso() !prof !14 { ; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; PGSO: [[SCALAR_PH:.*]]: -; PGSO-NEXT: br label %[[FOR_BODY29:.*]] ; PGSO: [[FOR_COND_CLEANUP28]]: ; PGSO-NEXT: unreachable -; PGSO: [[FOR_BODY29]]: -; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] -; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] -; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 -; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] -; PGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 -; PGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 -; PGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; PGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; NPGSO-LABEL: define void @pr43371_pgso( ; NPGSO-SAME: ) !prof [[PROF14]] { @@ -686,16 +631,6 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] -; CHECK-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] -; CHECK-NEXT: store i16 42, ptr [[GEPOFB]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; @@ -734,16 +669,6 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; PGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[FOR_END:.*]] -; PGSO: [[SCALAR_PH:.*]]: -; PGSO-NEXT: br label %[[FOR_BODY:.*]] -; PGSO: [[FOR_BODY]]: -; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; PGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] -; PGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] -; PGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 -; PGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; PGSO: [[FOR_END]]: ; PGSO-NEXT: ret void ; @@ -782,16 +707,6 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_END:.*]] -; NPGSO: [[SCALAR_PH:.*]]: -; NPGSO-NEXT: br label %[[FOR_BODY:.*]] -; NPGSO: [[FOR_BODY]]: -; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; NPGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] -; NPGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] -; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 -; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; NPGSO: [[FOR_END]]: ; NPGSO-NEXT: ret void ; @@ -830,7 +745,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; CHECK-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -843,7 +758,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; CHECK-NEXT: store i16 42, ptr [[GEPOFB]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; @@ -862,7 +777,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; PGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 ; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; PGSO: [[MIDDLE_BLOCK]]: ; PGSO-NEXT: br label %[[SCALAR_PH]] ; PGSO: [[SCALAR_PH]]: @@ -875,7 +790,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; PGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 ; PGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PGSO: [[FOR_END]]: ; PGSO-NEXT: ret void ; @@ -894,7 +809,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: @@ -907,7 +822,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 ; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; NPGSO: [[FOR_END]]: ; NPGSO-NEXT: ret void ; @@ -1092,10 +1007,8 @@ exit: ; CHECK: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]], [[META17]]} ; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} -; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META21:![0-9]+]]} -; CHECK: [[META21]] = !{!"llvm.loop.vectorize.enable", i1 true} -; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]} -; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]} ;. ; PGSO: [[PROF14]] = !{!"function_entry_count", i64 0} ; PGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]} @@ -1103,10 +1016,8 @@ exit: ; PGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"} ; PGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]], [[META17]]} ; PGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} -; PGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META21:![0-9]+]]} -; PGSO: [[META21]] = !{!"llvm.loop.vectorize.enable", i1 true} -; PGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]} -; PGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]} +; PGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]} +; PGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]} ;. ; NPGSO: [[PROF14]] = !{!"function_entry_count", i64 0} ; NPGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]} @@ -1119,8 +1030,6 @@ exit: ; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]} ; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META16]]} ; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]} -; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META26:![0-9]+]]} -; NPGSO: [[META26]] = !{!"llvm.loop.vectorize.enable", i1 true} -; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]], [[META17]]} -; NPGSO: [[LOOP28]] = distinct !{[[LOOP28]], [[META16]]} +; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]} +; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index bf5631c..7b5d0b6 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -185,13 +185,9 @@ define i32 @red_phi_0(i32 %start, ptr %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH1:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[SCALAR_PH1]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH1:.*:]] -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: br i1 poison, label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[START]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP0]]) diff --git a/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll index a2563256..f2d6834 100644 --- a/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll +++ b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll @@ -29,22 +29,6 @@ define void @pr154045(ptr %p, i1 %c, i64 %x) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; CHECK-NEXT: br i1 [[C]], label %[[LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: [[REM:%.*]] = srem i64 0, [[X]] -; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[LATCH]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[REM]], %[[ELSE]] ], [ 0, %[[LOOP]] ] -; CHECK-NEXT: [[PHI_TRUNC:%.*]] = trunc i64 [[PHI]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[PHI_TRUNC]], 0 -; CHECK-NEXT: store i32 [[SHL]], ptr [[P]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/pr32859.ll b/llvm/test/Transforms/LoopVectorize/pr32859.ll index a29a6bd..2d30e0c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr32859.ll +++ b/llvm/test/Transforms/LoopVectorize/pr32859.ll @@ -10,7 +10,7 @@ ; CHECK: %e.0.ph = phi i32 [ 0, %if.end.2.i ], [ 0, %middle.block ] ; Function Attrs: nounwind uwtable -define void @main() #0 { +define void @main(i32 %n) #0 { entry: br label %for.cond1.preheader.i @@ -21,7 +21,7 @@ for.cond1.preheader.i: ; preds = %if.end.2.i, %entry if.end.2.i: ; preds = %for.cond1.preheader.i %inc5.i = add nsw i32 %c.06.i, 1 - %cmp.i = icmp slt i32 %inc5.i, 16 + %cmp.i = icmp slt i32 %inc5.i, %n br i1 %cmp.i, label %for.cond1.preheader.i, label %for.cond.preheader for.cond.preheader: ; preds = %if.end.2.i diff --git a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll index b0e2ae6..98963a7 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll @@ -20,18 +20,8 @@ define i16 @duplicate_lcssa(i16 %val) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = sub nsw i16 [[IV]], 1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i16 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[LCSSA_1:%.*]] = phi i16 [ [[RES]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI1]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[LCSSA_2:%.*]] = phi i16 [ [[RES]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI1]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[LCSSA_2]] +; CHECK-NEXT: ret i16 [[VECTOR_RECUR_EXTRACT_FOR_PHI1]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index d1b912d..a1cb361 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -43,26 +43,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_07:%.*]] = phi i16 [ 99, [[SCALAR_PH:%.*]] ], [ [[INC7:%.*]], [[FOR_LATCH:%.*]] ] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr @v_38, align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i16 [[LV]], 32767 -; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END:%.*]], label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i16 [[LV]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_LATCH]], label [[COND_FALSE4:%.*]] -; CHECK: cond.false4: -; CHECK-NEXT: [[REM:%.*]] = srem i16 5786, [[LV]] -; CHECK-NEXT: br label [[FOR_LATCH]] -; CHECK: for.latch: -; CHECK-NEXT: [[COND6:%.*]] = phi i16 [ [[REM]], [[COND_FALSE4]] ], [ 5786, [[COND_END]] ] -; CHECK-NEXT: store i16 [[COND6]], ptr @v_39, align 1 -; CHECK-NEXT: [[INC7]] = add nsw i16 [[I_07]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC7]], 111 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]] +; CHECK-NEXT: br label [[FOR_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[RV:%.*]] = load i16, ptr @v_39, align 1 ; CHECK-NEXT: ret i16 [[RV]] diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 8450db6..9ed35fb 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -57,16 +57,7 @@ define void @pr45679(ptr %A) { ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] -; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -118,16 +109,7 @@ define void @pr45679(ptr %A) { ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2UF2: middle.block: -; VF2UF2-NEXT: br label [[EXIT:%.*]] -; VF2UF2: scalar.ph: ; VF2UF2-NEXT: br label [[LOOP:%.*]] -; VF2UF2: loop: -; VF2UF2-NEXT: [[RIV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] -; VF2UF2-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 -; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] ; VF2UF2: exit: ; VF2UF2-NEXT: ret void ; @@ -174,16 +156,7 @@ define void @pr45679(ptr %A) { ; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF1UF4: middle.block: -; VF1UF4-NEXT: br label [[EXIT:%.*]] -; VF1UF4: scalar.ph: ; VF1UF4-NEXT: br label [[LOOP:%.*]] -; VF1UF4: loop: -; VF1UF4-NEXT: [[RIV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] -; VF1UF4-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 -; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] ; VF1UF4: exit: ; VF1UF4-NEXT: ret void ; @@ -253,17 +226,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -319,17 +282,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2UF2: middle.block: -; VF2UF2-NEXT: br label [[FOR_END:%.*]] -; VF2UF2: scalar.ph: ; VF2UF2-NEXT: br label [[FOR_BODY:%.*]] -; VF2UF2: for.body: -; VF2UF2-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; VF2UF2-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; VF2UF2-NEXT: store i64 [[V]], ptr [[B]], align 8 -; VF2UF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF2UF2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 -; VF2UF2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; VF2UF2: for.end: ; VF2UF2-NEXT: ret void ; @@ -380,17 +333,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF1UF4: middle.block: -; VF1UF4-NEXT: br label [[FOR_END:%.*]] -; VF1UF4: scalar.ph: ; VF1UF4-NEXT: br label [[FOR_BODY:%.*]] -; VF1UF4: for.body: -; VF1UF4-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; VF1UF4-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; VF1UF4-NEXT: store i64 [[V]], ptr [[B]], align 8 -; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 -; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; VF1UF4: for.end: ; VF1UF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index 673d582..01c6c3f 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -31,23 +31,13 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[LOOP_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: store i32 0, ptr [[PTR]], align 4 -; CHECK-NEXT: [[V2:%.*]] = trunc i64 [[IV]] to i8 -; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1 -; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]] -; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]] ; CHECK: loop.exit: ; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]] ; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1 ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[IV_NEXT_1:%.*]], [[LOOP_2]] ], [ 0, [[LOOP_EXIT]] ] +; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[IV_NEXT_1:%.*]], [[LOOP_2]] ], [ 0, [[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT_1]] = add i64 [[IV_1]], [[V1]] ; CHECK-NEXT: call void @use(i64 [[IV_NEXT_1]]) ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV_NEXT_1]], 200 diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll index 75437fe..615ea06 100644 --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -61,24 +61,9 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP26]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 41, [[SCALAR_PH:%.*]] ], [ [[IVMINUS1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i16 [ 0, [[SCALAR_PH]] ], [ [[PREVSUM:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IVMINUS1]] = add nsw i16 [[IV]], -1 -; CHECK-NEXT: [[GEPA0:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 0 -; CHECK-NEXT: [[TMP29:%.*]] = load i16, ptr [[GEPA0]], align 1 -; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds [40 x [4 x i16]], ptr @A, i16 0, i16 [[IVMINUS1]], i16 3 -; CHECK-NEXT: [[TMP30:%.*]] = load i16, ptr [[GEPA3]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[IV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[PREVSUM_LCSSA:%.*]] = phi i16 [ [[PREVSUM]], [[LOOP]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i16 [[PREVSUM_LCSSA]] +; CHECK-NEXT: ret i16 [[TMP28]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index 637b4ab..7b35009 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -33,31 +33,9 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI7]]) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[PREDPHI5]], i32 1 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 6, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[V_2:%.*]] = phi i32 [ 35902, [[SCALAR_PH]] ], [ [[P_2:%.*]], [[LOOP_LATCH]] ] -; CHECK-NEXT: br i1 [[C_2]], label [[LOOP_LATCH]], label [[BODY_1:%.*]] -; CHECK: body.1: -; CHECK-NEXT: [[V_2_ADD:%.*]] = add i32 [[V_2]], 10 -; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[BODY_2:%.*]] -; CHECK: body.2: -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[V_2_ADD]], 20 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[A]], 1 -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[ADD_1]], [[XOR]] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ 9, [[BODY_1]] ], [ 9, [[BODY_2]] ] -; CHECK-NEXT: [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ] -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[IV]], 181 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_1]], [[E_2]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP9]], [[TMP10]] ; CHECK-NEXT: ret i32 [[RES]] ; bb: diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll index d5b2519..1ef614a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr66616.ll +++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll @@ -18,41 +18,32 @@ define void @pr66616(ptr %ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[PREHEADER:%.*]] -; CHECK: scalar.ph: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[VECTOR_BODY]] ] ; CHECK-NEXT: br label [[LOOP_1:%.*]] -; CHECK: loop.1: -; CHECK-NEXT: [[IV_1:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[INC:%.*]], [[LOOP_1]] ] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: [[INC]] = add i8 [[IV_1]], 1 -; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[INC]], 0 -; CHECK-NEXT: br i1 [[COND1]], label [[PREHEADER]], label [[LOOP_1]] ; CHECK: preheader: -; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[LOOP_1]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 0, [[ADD3_LCSSA]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[DOTLCSSA]] ; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH1:%.*]], label [[VECTOR_PH2:%.*]] -; CHECK: vector.ph2: +; CHECK: vector.ph1: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]] +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP3]], [[DOTCAST]] ; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY3:%.*]] -; CHECK: vector.body3: +; CHECK: vector.body2: ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH2]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY3]] ] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK6:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: middle.block6: +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK5:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block5: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH1]] -; CHECK: scalar.ph1: -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK6]] ], [ [[ADD3_LCSSA]], [[PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK6]] ], [ [[PTR]], [[PREHEADER]] ] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK5]] ], [ [[TMP3]], [[LOOP_1]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK5]] ], [ [[PTR]], [[LOOP_1]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH1]] ] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 70428f0..565e203 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -425,20 +425,6 @@ define void @switch_all_to_default(ptr %start) { ; IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC1: [[MIDDLE_BLOCK]]: ; IC1-NEXT: br label %[[EXIT:.*]] -; IC1: [[SCALAR_PH:.*]]: -; IC1-NEXT: br label %[[LOOP_HEADER:.*]] -; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH]] [ -; IC1-NEXT: i64 120, label %[[LOOP_LATCH]] -; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] -; IC1-NEXT: ] -; IC1: [[LOOP_LATCH]]: -; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[IV]] -; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 -; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -459,20 +445,6 @@ define void @switch_all_to_default(ptr %start) { ; IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IC2: [[MIDDLE_BLOCK]]: ; IC2-NEXT: br label %[[EXIT:.*]] -; IC2: [[SCALAR_PH:.*]]: -; IC2-NEXT: br label %[[LOOP_HEADER:.*]] -; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH]] [ -; IC2-NEXT: i64 120, label %[[LOOP_LATCH]] -; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] -; IC2-NEXT: ] -; IC2: [[LOOP_LATCH]]: -; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[IV]] -; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 -; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -513,21 +485,6 @@ define void @switch_unconditional(ptr %start) { ; IC1-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC1: [[MIDDLE_BLOCK]]: ; IC1-NEXT: br label %[[EXIT:.*]] -; IC1: [[SCALAR_PH:.*]]: -; IC1-NEXT: br label %[[LOOP:.*]] -; IC1: [[LOOP]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; IC1-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]] -; IC1-NEXT: [[X:%.*]] = load i32, ptr [[GEP]], align 4 -; IC1-NEXT: switch i32 [[X]], label %[[FOO:.*]] [ -; IC1-NEXT: ] -; IC1: [[FOO]]: -; IC1-NEXT: br label %[[LATCH]] -; IC1: [[LATCH]]: -; IC1-NEXT: store i32 0, ptr [[GEP]], align 4 -; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -548,21 +505,6 @@ define void @switch_unconditional(ptr %start) { ; IC2-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IC2: [[MIDDLE_BLOCK]]: ; IC2-NEXT: br label %[[EXIT:.*]] -; IC2: [[SCALAR_PH:.*]]: -; IC2-NEXT: br label %[[LOOP:.*]] -; IC2: [[LOOP]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; IC2-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]] -; IC2-NEXT: [[X:%.*]] = load i32, ptr [[GEP]], align 4 -; IC2-NEXT: switch i32 [[X]], label %[[FOO:.*]] [ -; IC2-NEXT: ] -; IC2: [[FOO]]: -; IC2-NEXT: br label %[[LATCH]] -; IC2: [[LATCH]]: -; IC2-NEXT: store i32 0, ptr [[GEP]], align 4 -; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll index dfdaaf1..52555d5 100644 --- a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll @@ -58,26 +58,6 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 -; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 -; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 -; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] -; CHECK: [[COND_FALSE]]: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ] -; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8 -; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -174,28 +154,6 @@ define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i8 [[IV]], 2 -; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48 -; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 [[SHL]], 52 -; CHECK-NEXT: [[TRUNC_I32:%.*]] = trunc i64 [[ASHR]] to i32 -; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] -; CHECK: [[COND_FALSE]]: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], %[[LOOP_HEADER]] ], [ [[ZEXT]], %[[COND_FALSE]] ] -; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8 -; CHECK-NEXT: [[REM:%.*]] = srem i8 [[IV]], [[TRUNC]] -; CHECK-NEXT: [[GEP_P_REM:%.*]] = getelementptr i32, ptr [[P]], i8 [[REM]] -; CHECK-NEXT: store i32 4, ptr [[GEP_P_REM]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV]], 8 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -245,19 +203,6 @@ define void @loop_invariant_float_store(ptr %p, i32 %a) { ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[CMP_SLT]], label %[[COND_FALSE:.*]], label %[[LOOP_LATCH]] -; CHECK: [[COND_FALSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: store float [[TMP10]], ptr [[P]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[IV]], 8 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -315,19 +260,6 @@ define void @test_store_to_invariant_address_needs_mask_due_to_low_trip_count(pt ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 true, label %[[LOOP_LATCH]], label %[[ELSE:.*]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 1, %[[LOOP_HEADER]] ], [ 0, %[[ELSE]] ] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 3 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 14526af..6542c42 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -27,17 +27,6 @@ define void @_Z3fooPf(ptr %a) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[P:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[P]], 2.000000e+00 -; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; @@ -58,25 +47,8 @@ define void @_Z3fooPf(ptr %a) { ; DEBUGLOC-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP25:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: ; DEBUGLOC-NEXT: br label %[[FOR_END:.*]], !dbg [[DBG24]] -; DEBUGLOC: [[SCALAR_PH:.*]]: -; DEBUGLOC-NEXT: br label %[[FOR_BODY:.*]], !dbg [[DBG18]] -; DEBUGLOC: [[FOR_BODY]]: -; DEBUGLOC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], !dbg [[DBG19]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[INDVARS_IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG19]]) -; DEBUGLOC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]], !dbg [[DBG20]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[ARRAYIDX]], [[META11:![0-9]+]], !DIExpression(), [[DBG20]]) -; DEBUGLOC-NEXT: [[P:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG21]] -; DEBUGLOC-NEXT: #dbg_value(float [[P]], [[META12:![0-9]+]], !DIExpression(), [[DBG21]]) -; DEBUGLOC-NEXT: [[MUL:%.*]] = fmul float [[P]], 2.000000e+00, !dbg [[DBG22]] -; DEBUGLOC-NEXT: #dbg_value(float [[MUL]], [[META14:![0-9]+]], !DIExpression(), [[DBG22]]) -; DEBUGLOC-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG23]] -; DEBUGLOC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1, !dbg [[DBG28:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[INDVARS_IV_NEXT]], [[META15:![0-9]+]], !DIExpression(), [[DBG28]]) -; DEBUGLOC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024, !dbg [[DBG29:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[EXITCOND]], [[META16:![0-9]+]], !DIExpression(), [[DBG29]]) -; DEBUGLOC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !dbg [[DBG24]], !llvm.loop [[LOOP30:![0-9]+]] ; DEBUGLOC: [[FOR_END]]: -; DEBUGLOC-NEXT: ret void, !dbg [[DBG32:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG28:![0-9]+]] ; entry: br label %for.body @@ -122,7 +94,7 @@ define void @widen_ptr_induction_dbg(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -134,54 +106,54 @@ define void @widen_ptr_induction_dbg(ptr %start, ptr %end) { ; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i64 1 ; CHECK-NEXT: store ptr [[IV]], ptr [[IV]], align 1 ; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: define void @widen_ptr_induction_dbg( -; DEBUGLOC-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) !dbg [[DBG33:![0-9]+]] { +; DEBUGLOC-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) !dbg [[DBG29:![0-9]+]] { ; DEBUGLOC-NEXT: [[ENTRY:.*]]: -; DEBUGLOC-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64, !dbg [[DBG38:![0-9]+]] -; DEBUGLOC-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64, !dbg [[DBG38]] -; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8, !dbg [[DBG38]] -; DEBUGLOC-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]], !dbg [[DBG38]] -; DEBUGLOC-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3, !dbg [[DBG38]] -; DEBUGLOC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1, !dbg [[DBG38]] -; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4, !dbg [[DBG38]] -; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG38]] +; DEBUGLOC-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64, !dbg [[DBG34:![0-9]+]] +; DEBUGLOC-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64, !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8, !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]], !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3, !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1, !dbg [[DBG34]] +; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4, !dbg [[DBG34]] +; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG34]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; DEBUGLOC-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8 ; DEBUGLOC-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG38]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG34]] ; DEBUGLOC: [[VECTOR_BODY]]: ; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; DEBUGLOC-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG39:![0-9]+]] -; DEBUGLOC-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 8, i64 16, i64 24>, !dbg [[DBG39]] -; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0, !dbg [[DBG40:![0-9]+]] -; DEBUGLOC-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 1, !dbg [[DBG40]] +; DEBUGLOC-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG35:![0-9]+]] +; DEBUGLOC-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 8, i64 16, i64 24>, !dbg [[DBG35]] +; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0, !dbg [[DBG36:![0-9]+]] +; DEBUGLOC-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 1, !dbg [[DBG36]] ; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; DEBUGLOC-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32, !dbg [[DBG39]] -; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG41:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG41]], !llvm.loop [[LOOP42:![0-9]+]] +; DEBUGLOC-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32, !dbg [[DBG35]] +; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG37:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG37]], !llvm.loop [[LOOP38:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: -; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]], !dbg [[DBG41]] -; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG41]] +; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]], !dbg [[DBG37]] +; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG37]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ], !dbg [[DBG39]] -; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG38]] +; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ], !dbg [[DBG35]] +; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG34]] ; DEBUGLOC: [[LOOP]]: -; DEBUGLOC-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG39]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[IV]], [[META35:![0-9]+]], !DIExpression(), [[DBG39]]) -; DEBUGLOC-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i64 1, !dbg [[DBG43:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[IV_NEXT]], [[META36:![0-9]+]], !DIExpression(), [[DBG43]]) -; DEBUGLOC-NEXT: store ptr [[IV]], ptr [[IV]], align 1, !dbg [[DBG40]] -; DEBUGLOC-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]], !dbg [[DBG44:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[CMP_NOT]], [[META37:![0-9]+]], !DIExpression(), [[DBG44]]) -; DEBUGLOC-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG41]], !llvm.loop [[LOOP45:![0-9]+]] +; DEBUGLOC-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG35]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[IV]], [[META31:![0-9]+]], !DIExpression(), [[DBG35]]) +; DEBUGLOC-NEXT: [[IV_NEXT]] = getelementptr inbounds ptr, ptr [[IV]], i64 1, !dbg [[DBG39:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[IV_NEXT]], [[META32:![0-9]+]], !DIExpression(), [[DBG39]]) +; DEBUGLOC-NEXT: store ptr [[IV]], ptr [[IV]], align 1, !dbg [[DBG36]] +; DEBUGLOC-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV_NEXT]], [[END]], !dbg [[DBG40:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i1 [[CMP_NOT]], [[META33:![0-9]+]], !DIExpression(), [[DBG40]]) +; DEBUGLOC-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG37]], !llvm.loop [[LOOP41:![0-9]+]] ; DEBUGLOC: [[EXIT]]: -; DEBUGLOC-NEXT: ret void, !dbg [[DBG46:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG42:![0-9]+]] ; entry: br label %loop @@ -254,7 +226,7 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] @@ -274,96 +246,96 @@ define void @predicated_phi_dbg(i64 %n, ptr %x) { ; CHECK-NEXT: store i64 [[D]], ptr [[IDX]], align 8 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: define void @predicated_phi_dbg( -; DEBUGLOC-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) !dbg [[DBG47:![0-9]+]] { +; DEBUGLOC-SAME: i64 [[N:%.*]], ptr [[X:%.*]]) !dbg [[DBG43:![0-9]+]] { ; DEBUGLOC-NEXT: [[ENTRY:.*]]: -; DEBUGLOC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1), !dbg [[DBG56:![0-9]+]] -; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4, !dbg [[DBG56]] -; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG56]] +; DEBUGLOC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1), !dbg [[DBG52:![0-9]+]] +; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4, !dbg [[DBG52]] +; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]], !dbg [[DBG52]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG56]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG52]] ; DEBUGLOC: [[VECTOR_BODY]]: -; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE6:.*]] ], !dbg [[DBG57:![0-9]+]] -; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_UDIV_CONTINUE6]] ], !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 5), !dbg [[DBG58:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0, !dbg [[DBG58]] -; DEBUGLOC-NEXT: br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UDIV_CONTINUE6:.*]] ], !dbg [[DBG53:![0-9]+]] +; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_UDIV_CONTINUE6]] ], !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 5), !dbg [[DBG54:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0, !dbg [[DBG54]] +; DEBUGLOC-NEXT: br i1 [[TMP1]], label %[[PRED_UDIV_IF:.*]], label %[[PRED_UDIV_CONTINUE:.*]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_IF]]: -; DEBUGLOC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0, !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP3:%.*]] = udiv i64 [[N]], [[TMP2]], !dbg [[DBG59:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0, !dbg [[DBG59]] -; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0, !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP3:%.*]] = udiv i64 [[N]], [[TMP2]], !dbg [[DBG55:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i32 0, !dbg [[DBG55]] +; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_CONTINUE]]: -; DEBUGLOC-NEXT: [[TMP5:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_UDIV_IF]] ], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1, !dbg [[DBG58]] -; DEBUGLOC-NEXT: br i1 [[TMP6]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP5:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_BODY]] ], [ [[TMP4]], %[[PRED_UDIV_IF]] ], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1, !dbg [[DBG54]] +; DEBUGLOC-NEXT: br i1 [[TMP6]], label %[[PRED_UDIV_IF1:.*]], label %[[PRED_UDIV_CONTINUE2:.*]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_IF1]]: -; DEBUGLOC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1, !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP8:%.*]] = udiv i64 [[N]], [[TMP7]], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP8]], i32 1, !dbg [[DBG59]] -; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE2]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1, !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP8:%.*]] = udiv i64 [[N]], [[TMP7]], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP8]], i32 1, !dbg [[DBG55]] +; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE2]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_CONTINUE2]]: -; DEBUGLOC-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP5]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], %[[PRED_UDIV_IF1]] ], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2, !dbg [[DBG58]] -; DEBUGLOC-NEXT: br i1 [[TMP11]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP5]], %[[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], %[[PRED_UDIV_IF1]] ], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2, !dbg [[DBG54]] +; DEBUGLOC-NEXT: br i1 [[TMP11]], label %[[PRED_UDIV_IF3:.*]], label %[[PRED_UDIV_CONTINUE4:.*]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_IF3]]: -; DEBUGLOC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2, !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP13:%.*]] = udiv i64 [[N]], [[TMP12]], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP13]], i32 2, !dbg [[DBG59]] -; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE4]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2, !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP13:%.*]] = udiv i64 [[N]], [[TMP12]], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP13]], i32 2, !dbg [[DBG55]] +; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE4]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_CONTINUE4]]: -; DEBUGLOC-NEXT: [[TMP15:%.*]] = phi <4 x i64> [ [[TMP10]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP14]], %[[PRED_UDIV_IF3]] ], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3, !dbg [[DBG58]] -; DEBUGLOC-NEXT: br i1 [[TMP16]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP15:%.*]] = phi <4 x i64> [ [[TMP10]], %[[PRED_UDIV_CONTINUE2]] ], [ [[TMP14]], %[[PRED_UDIV_IF3]] ], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3, !dbg [[DBG54]] +; DEBUGLOC-NEXT: br i1 [[TMP16]], label %[[PRED_UDIV_IF5:.*]], label %[[PRED_UDIV_CONTINUE6]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_IF5]]: -; DEBUGLOC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3, !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP18:%.*]] = udiv i64 [[N]], [[TMP17]], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP18]], i32 3, !dbg [[DBG59]] -; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE6]], !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3, !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP18:%.*]] = udiv i64 [[N]], [[TMP17]], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP18]], i32 3, !dbg [[DBG55]] +; DEBUGLOC-NEXT: br label %[[PRED_UDIV_CONTINUE6]], !dbg [[DBG54]] ; DEBUGLOC: [[PRED_UDIV_CONTINUE6]]: -; DEBUGLOC-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ [[TMP15]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP19]], %[[PRED_UDIV_IF5]] ], !dbg [[DBG59]] -; DEBUGLOC-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP20]], <4 x i64> zeroinitializer, !dbg [[DBG60:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]], !dbg [[DBG61:![0-9]+]] -; DEBUGLOC-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8, !dbg [[DBG62:![0-9]+]] -; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG57]] -; DEBUGLOC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG63:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG63]], !llvm.loop [[LOOP64:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ [[TMP15]], %[[PRED_UDIV_CONTINUE4]] ], [ [[TMP19]], %[[PRED_UDIV_IF5]] ], !dbg [[DBG55]] +; DEBUGLOC-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP20]], <4 x i64> zeroinitializer, !dbg [[DBG56:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[X]], i64 [[INDEX]], !dbg [[DBG57:![0-9]+]] +; DEBUGLOC-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP21]], align 8, !dbg [[DBG58:![0-9]+]] +; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4), !dbg [[DBG53]] +; DEBUGLOC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG59:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG59]], !llvm.loop [[LOOP60:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: -; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]], !dbg [[DBG63]] -; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]], !dbg [[DBG63]] +; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]], !dbg [[DBG59]] +; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]], !dbg [[DBG59]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG57]] -; DEBUGLOC-NEXT: br label %[[FOR_BODY:.*]], !dbg [[DBG56]] +; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], !dbg [[DBG53]] +; DEBUGLOC-NEXT: br label %[[FOR_BODY:.*]], !dbg [[DBG52]] ; DEBUGLOC: [[FOR_BODY]]: -; DEBUGLOC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[FOR_INC:.*]] ], !dbg [[DBG57]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[I]], [[META49:![0-9]+]], !DIExpression(), [[DBG57]]) -; DEBUGLOC-NEXT: [[CMP:%.*]] = icmp ult i64 [[I]], 5, !dbg [[DBG58]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[CMP]], [[META50:![0-9]+]], !DIExpression(), [[DBG58]]) -; DEBUGLOC-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC]], !dbg [[DBG65:![0-9]+]] +; DEBUGLOC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[FOR_INC:.*]] ], !dbg [[DBG53]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[I]], [[META45:![0-9]+]], !DIExpression(), [[DBG53]]) +; DEBUGLOC-NEXT: [[CMP:%.*]] = icmp ult i64 [[I]], 5, !dbg [[DBG54]] +; DEBUGLOC-NEXT: #dbg_value(i1 [[CMP]], [[META46:![0-9]+]], !DIExpression(), [[DBG54]]) +; DEBUGLOC-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC]], !dbg [[DBG61:![0-9]+]] ; DEBUGLOC: [[IF_THEN]]: -; DEBUGLOC-NEXT: [[TMP4:%.*]] = udiv i64 [[N]], [[I]], !dbg [[DBG59]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[TMP4]], [[META51:![0-9]+]], !DIExpression(), [[DBG59]]) -; DEBUGLOC-NEXT: br label %[[FOR_INC]], !dbg [[DBG66:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP4:%.*]] = udiv i64 [[N]], [[I]], !dbg [[DBG55]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[TMP4]], [[META47:![0-9]+]], !DIExpression(), [[DBG55]]) +; DEBUGLOC-NEXT: br label %[[FOR_INC]], !dbg [[DBG62:![0-9]+]] ; DEBUGLOC: [[FOR_INC]]: -; DEBUGLOC-NEXT: [[D:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[TMP4]], %[[IF_THEN]] ], !dbg [[DBG60]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[D]], [[META52:![0-9]+]], !DIExpression(), [[DBG60]]) -; DEBUGLOC-NEXT: [[IDX:%.*]] = getelementptr i64, ptr [[X]], i64 [[I]], !dbg [[DBG61]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[IDX]], [[META53:![0-9]+]], !DIExpression(), [[DBG61]]) -; DEBUGLOC-NEXT: store i64 [[D]], ptr [[IDX]], align 8, !dbg [[DBG62]] -; DEBUGLOC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1, !dbg [[DBG67:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[I_NEXT]], [[META54:![0-9]+]], !DIExpression(), [[DBG67]]) -; DEBUGLOC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]], !dbg [[DBG68:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[COND]], [[META55:![0-9]+]], !DIExpression(), [[DBG68]]) -; DEBUGLOC-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !dbg [[DBG63]], !llvm.loop [[LOOP69:![0-9]+]] +; DEBUGLOC-NEXT: [[D:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[TMP4]], %[[IF_THEN]] ], !dbg [[DBG56]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[D]], [[META48:![0-9]+]], !DIExpression(), [[DBG56]]) +; DEBUGLOC-NEXT: [[IDX:%.*]] = getelementptr i64, ptr [[X]], i64 [[I]], !dbg [[DBG57]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[IDX]], [[META49:![0-9]+]], !DIExpression(), [[DBG57]]) +; DEBUGLOC-NEXT: store i64 [[D]], ptr [[IDX]], align 8, !dbg [[DBG58]] +; DEBUGLOC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1, !dbg [[DBG63:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[I_NEXT]], [[META50:![0-9]+]], !DIExpression(), [[DBG63]]) +; DEBUGLOC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]], !dbg [[DBG64:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i1 [[COND]], [[META51:![0-9]+]], !DIExpression(), [[DBG64]]) +; DEBUGLOC-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !dbg [[DBG59]], !llvm.loop [[LOOP65:![0-9]+]] ; DEBUGLOC: [[FOR_END]]: -; DEBUGLOC-NEXT: ret void, !dbg [[DBG70:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG66:![0-9]+]] ; entry: br label %for.body @@ -415,7 +387,7 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -429,57 +401,57 @@ define void @scalar_cast_dbg(ptr nocapture %a, i32 %start, i64 %k) { ; CHECK-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[K]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: define void @scalar_cast_dbg( -; DEBUGLOC-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) !dbg [[DBG71:![0-9]+]] { +; DEBUGLOC-SAME: ptr captures(none) [[A:%.*]], i32 [[START:%.*]], i64 [[K:%.*]]) !dbg [[DBG67:![0-9]+]] { ; DEBUGLOC-NEXT: [[ENTRY:.*]]: -; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4, !dbg [[DBG78:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !dbg [[DBG78]] +; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K]], 4, !dbg [[DBG74:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !dbg [[DBG74]] ; DEBUGLOC: [[VECTOR_SCEVCHECK]]: -; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1, !dbg [[DBG78]] -; DEBUGLOC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32, !dbg [[DBG78]] -; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0, !dbg [[DBG78]] -; DEBUGLOC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295, !dbg [[DBG78]] -; DEBUGLOC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]], !dbg [[DBG78]] -; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG79:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1, !dbg [[DBG74]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32, !dbg [[DBG74]] +; DEBUGLOC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0, !dbg [[DBG74]] +; DEBUGLOC-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP0]], 4294967295, !dbg [[DBG74]] +; DEBUGLOC-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]], !dbg [[DBG74]] +; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG75:![0-9]+]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG79]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG75]] ; DEBUGLOC: [[VECTOR_BODY]]: -; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG79]] -; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG80:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG80]] -; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG81:![0-9]+]] -; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG82:![0-9]+]] -; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG79]] -; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG80]] -; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG83:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG83]], !llvm.loop [[LOOP84:![0-9]+]] +; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG75]] +; DEBUGLOC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG76:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32, !dbg [[DBG76]] +; DEBUGLOC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]], !dbg [[DBG77:![0-9]+]] +; DEBUGLOC-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP6]], align 4, !dbg [[DBG78:![0-9]+]] +; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG75]] +; DEBUGLOC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4), !dbg [[DBG76]] +; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG79:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG79]], !llvm.loop [[LOOP80:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: -; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]], !dbg [[DBG83]] -; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG83]] +; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]], !dbg [[DBG79]] +; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG79]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], !dbg [[DBG79]] -; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG78]] +; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], !dbg [[DBG75]] +; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG74]] ; DEBUGLOC: [[LOOP]]: -; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG79]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[IV]], [[META73:![0-9]+]], !DIExpression(), [[DBG79]]) -; DEBUGLOC-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32, !dbg [[DBG80]] -; DEBUGLOC-NEXT: #dbg_value(i32 [[TRUNC_IV]], [[META74:![0-9]+]], !DIExpression(), [[DBG80]]) -; DEBUGLOC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TRUNC_IV]], !dbg [[DBG81]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[ARRAYIDX]], [[META75:![0-9]+]], !DIExpression(), [[DBG81]]) -; DEBUGLOC-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG82]] -; DEBUGLOC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1, !dbg [[DBG85:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[IV_NEXT]], [[META76:![0-9]+]], !DIExpression(), [[DBG85]]) -; DEBUGLOC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[K]], !dbg [[DBG86:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[EXITCOND]], [[META77:![0-9]+]], !DIExpression(), [[DBG86]]) -; DEBUGLOC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG83]], !llvm.loop [[LOOP87:![0-9]+]] +; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG75]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[IV]], [[META69:![0-9]+]], !DIExpression(), [[DBG75]]) +; DEBUGLOC-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i32, !dbg [[DBG76]] +; DEBUGLOC-NEXT: #dbg_value(i32 [[TRUNC_IV]], [[META70:![0-9]+]], !DIExpression(), [[DBG76]]) +; DEBUGLOC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TRUNC_IV]], !dbg [[DBG77]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[ARRAYIDX]], [[META71:![0-9]+]], !DIExpression(), [[DBG77]]) +; DEBUGLOC-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG78]] +; DEBUGLOC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1, !dbg [[DBG81:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[IV_NEXT]], [[META72:![0-9]+]], !DIExpression(), [[DBG81]]) +; DEBUGLOC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[K]], !dbg [[DBG82:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i1 [[EXITCOND]], [[META73:![0-9]+]], !DIExpression(), [[DBG82]]) +; DEBUGLOC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG79]], !llvm.loop [[LOOP83:![0-9]+]] ; DEBUGLOC: [[EXIT]]: -; DEBUGLOC-NEXT: ret void, !dbg [[DBG88:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG84:![0-9]+]] ; entry: br label %loop @@ -522,7 +494,7 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -538,60 +510,60 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { ; CHECK-NEXT: store float [[CALL]], ptr [[GEP_X]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: define void @widen_intrinsic_dbg( -; DEBUGLOC-SAME: i64 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) !dbg [[DBG89:![0-9]+]] { +; DEBUGLOC-SAME: i64 [[N:%.*]], ptr [[Y:%.*]], ptr [[X:%.*]]) !dbg [[DBG85:![0-9]+]] { ; DEBUGLOC-NEXT: [[ENTRY:.*]]: -; DEBUGLOC-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64, !dbg [[DBG98:![0-9]+]] -; DEBUGLOC-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG98]] -; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4, !dbg [[DBG98]] -; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]], !dbg [[DBG98]] +; DEBUGLOC-NEXT: [[Y2:%.*]] = ptrtoint ptr [[Y]] to i64, !dbg [[DBG94:![0-9]+]] +; DEBUGLOC-NEXT: [[X1:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG94]] +; DEBUGLOC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4, !dbg [[DBG94]] +; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]], !dbg [[DBG94]] ; DEBUGLOC: [[VECTOR_MEMCHECK]]: -; DEBUGLOC-NEXT: [[TMP0:%.*]] = sub i64 [[X1]], [[Y2]], !dbg [[DBG98]] -; DEBUGLOC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16, !dbg [[DBG98]] -; DEBUGLOC-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG99:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP0:%.*]] = sub i64 [[X1]], [[Y2]], !dbg [[DBG94]] +; DEBUGLOC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16, !dbg [[DBG94]] +; DEBUGLOC-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG95:![0-9]+]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; DEBUGLOC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG99]] +; DEBUGLOC-NEXT: br label %[[VECTOR_BODY:.*]], !dbg [[DBG95]] ; DEBUGLOC: [[VECTOR_BODY]]: -; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG99]] -; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]], !dbg [[DBG100:![0-9]+]] -; DEBUGLOC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !dbg [[DBG101:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]]), !dbg [[DBG102:![0-9]+]] -; DEBUGLOC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]], !dbg [[DBG103:![0-9]+]] -; DEBUGLOC-NEXT: store <4 x float> [[TMP2]], ptr [[TMP3]], align 4, !dbg [[DBG104:![0-9]+]] -; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG99]] -; DEBUGLOC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG105:![0-9]+]] -; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG105]], !llvm.loop [[LOOP106:![0-9]+]] +; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG95]] +; DEBUGLOC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDEX]], !dbg [[DBG96:![0-9]+]] +; DEBUGLOC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !dbg [[DBG97:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]]), !dbg [[DBG98:![0-9]+]] +; DEBUGLOC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDEX]], !dbg [[DBG99:![0-9]+]] +; DEBUGLOC-NEXT: store <4 x float> [[TMP2]], ptr [[TMP3]], align 4, !dbg [[DBG100:![0-9]+]] +; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG95]] +; DEBUGLOC-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG101:![0-9]+]] +; DEBUGLOC-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !dbg [[DBG101]], !llvm.loop [[LOOP102:![0-9]+]] ; DEBUGLOC: [[MIDDLE_BLOCK]]: -; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]], !dbg [[DBG105]] -; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG105]] +; DEBUGLOC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]], !dbg [[DBG101]] +; DEBUGLOC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]], !dbg [[DBG101]] ; DEBUGLOC: [[SCALAR_PH]]: -; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ], !dbg [[DBG99]] -; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG98]] +; DEBUGLOC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_MEMCHECK]] ], !dbg [[DBG95]] +; DEBUGLOC-NEXT: br label %[[LOOP:.*]], !dbg [[DBG94]] ; DEBUGLOC: [[LOOP]]: -; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG99]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[IV]], [[META91:![0-9]+]], !DIExpression(), [[DBG99]]) -; DEBUGLOC-NEXT: [[GEP_Y:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[IV]], !dbg [[DBG100]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_Y]], [[META92:![0-9]+]], !DIExpression(), [[DBG100]]) -; DEBUGLOC-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP_Y]], align 4, !dbg [[DBG101]] -; DEBUGLOC-NEXT: #dbg_value(float [[LOAD]], [[META93:![0-9]+]], !DIExpression(), [[DBG101]]) -; DEBUGLOC-NEXT: [[CALL:%.*]] = call float @llvm.sqrt.f32(float [[LOAD]]), !dbg [[DBG102]] -; DEBUGLOC-NEXT: #dbg_value(float [[CALL]], [[META94:![0-9]+]], !DIExpression(), [[DBG102]]) -; DEBUGLOC-NEXT: [[GEP_X:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[IV]], !dbg [[DBG103]] -; DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_X]], [[META95:![0-9]+]], !DIExpression(), [[DBG103]]) -; DEBUGLOC-NEXT: store float [[CALL]], ptr [[GEP_X]], align 4, !dbg [[DBG104]] -; DEBUGLOC-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG107:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i64 [[IV_NEXT]], [[META96:![0-9]+]], !DIExpression(), [[DBG107]]) -; DEBUGLOC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]], !dbg [[DBG108:![0-9]+]] -; DEBUGLOC-NEXT: #dbg_value(i1 [[EXITCOND]], [[META97:![0-9]+]], !DIExpression(), [[DBG108]]) -; DEBUGLOC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG105]], !llvm.loop [[LOOP109:![0-9]+]] +; DEBUGLOC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ], !dbg [[DBG95]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[IV]], [[META87:![0-9]+]], !DIExpression(), [[DBG95]]) +; DEBUGLOC-NEXT: [[GEP_Y:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[IV]], !dbg [[DBG96]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_Y]], [[META88:![0-9]+]], !DIExpression(), [[DBG96]]) +; DEBUGLOC-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP_Y]], align 4, !dbg [[DBG97]] +; DEBUGLOC-NEXT: #dbg_value(float [[LOAD]], [[META89:![0-9]+]], !DIExpression(), [[DBG97]]) +; DEBUGLOC-NEXT: [[CALL:%.*]] = call float @llvm.sqrt.f32(float [[LOAD]]), !dbg [[DBG98]] +; DEBUGLOC-NEXT: #dbg_value(float [[CALL]], [[META90:![0-9]+]], !DIExpression(), [[DBG98]]) +; DEBUGLOC-NEXT: [[GEP_X:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[IV]], !dbg [[DBG99]] +; DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_X]], [[META91:![0-9]+]], !DIExpression(), [[DBG99]]) +; DEBUGLOC-NEXT: store float [[CALL]], ptr [[GEP_X]], align 4, !dbg [[DBG100]] +; DEBUGLOC-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1, !dbg [[DBG103:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i64 [[IV_NEXT]], [[META92:![0-9]+]], !DIExpression(), [[DBG103]]) +; DEBUGLOC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]], !dbg [[DBG104:![0-9]+]] +; DEBUGLOC-NEXT: #dbg_value(i1 [[EXITCOND]], [[META93:![0-9]+]], !DIExpression(), [[DBG104]]) +; DEBUGLOC-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !dbg [[DBG101]], !llvm.loop [[LOOP105:![0-9]+]] ; DEBUGLOC: [[EXIT]]: -; DEBUGLOC-NEXT: ret void, !dbg [[DBG110:![0-9]+]] +; DEBUGLOC-NEXT: ret void, !dbg [[DBG106:![0-9]+]] ; entry: br label %loop @@ -618,23 +590,21 @@ exit: ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; CHECK: [[META4]] = !{!"llvm.loop.vectorize.width", i32 4} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} ;. ; DEBUGLOC: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) ; DEBUGLOC: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}}) ; DEBUGLOC: [[DBG5]] = distinct !DISubprogram(name: "_Z3fooPf", linkageName: "_Z3fooPf", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META8:![0-9]+]]) ; DEBUGLOC: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) ; DEBUGLOC: [[META7]] = !{} -; DEBUGLOC: [[META8]] = !{[[META9]], [[META11]], [[META12]], [[META14]], [[META15]], [[META16]]} +; DEBUGLOC: [[META8]] = !{[[META9:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]], [[META14:![0-9]+]], [[META15:![0-9]+]], [[META16:![0-9]+]]} ; DEBUGLOC: [[META9]] = !DILocalVariable(name: "1", scope: [[DBG5]], file: [[META1]], line: 2, type: [[META10:![0-9]+]]) ; DEBUGLOC: [[META10]] = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned) ; DEBUGLOC: [[META11]] = !DILocalVariable(name: "2", scope: [[DBG5]], file: [[META1]], line: 3, type: [[META10]]) @@ -654,87 +624,83 @@ exit: ; DEBUGLOC: [[LOOP25]] = distinct !{[[LOOP25]], [[META26:![0-9]+]], [[META27:![0-9]+]]} ; DEBUGLOC: [[META26]] = !{!"llvm.loop.isvectorized", i32 1} ; DEBUGLOC: [[META27]] = !{!"llvm.loop.unroll.runtime.disable"} -; DEBUGLOC: [[DBG28]] = !DILocation(line: 7, column: 1, scope: [[DBG5]]) -; DEBUGLOC: [[DBG29]] = !DILocation(line: 8, column: 1, scope: [[DBG5]]) -; DEBUGLOC: [[LOOP30]] = distinct !{[[LOOP30]], [[META31:![0-9]+]]} -; DEBUGLOC: [[META31]] = !{!"llvm.loop.vectorize.width", i32 4} -; DEBUGLOC: [[DBG32]] = !DILocation(line: 10, column: 1, scope: [[DBG5]]) -; DEBUGLOC: [[DBG33]] = distinct !DISubprogram(name: "widen_ptr_induction_dbg", linkageName: "widen_ptr_induction_dbg", scope: null, file: [[META1]], line: 11, type: [[META6]], scopeLine: 11, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META34:![0-9]+]]) -; DEBUGLOC: [[META34]] = !{[[META35]], [[META36]], [[META37]]} -; DEBUGLOC: [[META35]] = !DILocalVariable(name: "7", scope: [[DBG33]], file: [[META1]], line: 12, type: [[META10]]) -; DEBUGLOC: [[META36]] = !DILocalVariable(name: "8", scope: [[DBG33]], file: [[META1]], line: 13, type: [[META10]]) -; DEBUGLOC: [[META37]] = !DILocalVariable(name: "9", scope: [[DBG33]], file: [[META1]], line: 15, type: [[META17]]) -; DEBUGLOC: [[DBG38]] = !DILocation(line: 11, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[DBG39]] = !DILocation(line: 12, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[DBG40]] = !DILocation(line: 14, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[DBG41]] = !DILocation(line: 16, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[LOOP42]] = distinct !{[[LOOP42]], [[META26]], [[META27]]} -; DEBUGLOC: [[DBG43]] = !DILocation(line: 13, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[LOOP45]] = distinct !{[[LOOP45]], [[META27]], [[META26]]} -; DEBUGLOC: [[DBG46]] = !DILocation(line: 17, column: 1, scope: [[DBG33]]) -; DEBUGLOC: [[DBG47]] = distinct !DISubprogram(name: "predicated_phi_dbg", linkageName: "predicated_phi_dbg", scope: null, file: [[META1]], line: 18, type: [[META6]], scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META48:![0-9]+]]) -; DEBUGLOC: [[META48]] = !{[[META49]], [[META50]], [[META51]], [[META52]], [[META53]], [[META54]], [[META55]]} -; DEBUGLOC: [[META49]] = !DILocalVariable(name: "10", scope: [[DBG47]], file: [[META1]], line: 19, type: [[META10]]) -; DEBUGLOC: [[META50]] = !DILocalVariable(name: "11", scope: [[DBG47]], file: [[META1]], line: 20, type: [[META17]]) -; DEBUGLOC: [[META51]] = !DILocalVariable(name: "12", scope: [[DBG47]], file: [[META1]], line: 22, type: [[META10]]) -; DEBUGLOC: [[META52]] = !DILocalVariable(name: "13", scope: [[DBG47]], file: [[META1]], line: 24, type: [[META10]]) -; DEBUGLOC: [[META53]] = !DILocalVariable(name: "14", scope: [[DBG47]], file: [[META1]], line: 25, type: [[META10]]) -; DEBUGLOC: [[META54]] = !DILocalVariable(name: "15", scope: [[DBG47]], file: [[META1]], line: 27, type: [[META10]]) -; DEBUGLOC: [[META55]] = !DILocalVariable(name: "16", scope: [[DBG47]], file: [[META1]], line: 28, type: [[META17]]) -; DEBUGLOC: [[DBG56]] = !DILocation(line: 18, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG57]] = !DILocation(line: 19, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG58]] = !DILocation(line: 20, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG59]] = !DILocation(line: 22, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG60]] = !DILocation(line: 24, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG61]] = !DILocation(line: 25, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG62]] = !DILocation(line: 26, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG63]] = !DILocation(line: 29, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[LOOP64]] = distinct !{[[LOOP64]], [[META26]], [[META27]]} -; DEBUGLOC: [[DBG65]] = !DILocation(line: 21, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG66]] = !DILocation(line: 23, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG67]] = !DILocation(line: 27, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG68]] = !DILocation(line: 28, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[LOOP69]] = distinct !{[[LOOP69]], [[META27]], [[META26]]} -; DEBUGLOC: [[DBG70]] = !DILocation(line: 30, column: 1, scope: [[DBG47]]) -; DEBUGLOC: [[DBG71]] = distinct !DISubprogram(name: "scalar_cast_dbg", linkageName: "scalar_cast_dbg", scope: null, file: [[META1]], line: 31, type: [[META6]], scopeLine: 31, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META72:![0-9]+]]) -; DEBUGLOC: [[META72]] = !{[[META73]], [[META74]], [[META75]], [[META76]], [[META77]]} -; DEBUGLOC: [[META73]] = !DILocalVariable(name: "17", scope: [[DBG71]], file: [[META1]], line: 32, type: [[META10]]) -; DEBUGLOC: [[META74]] = !DILocalVariable(name: "18", scope: [[DBG71]], file: [[META1]], line: 33, type: [[META13]]) -; DEBUGLOC: [[META75]] = !DILocalVariable(name: "19", scope: [[DBG71]], file: [[META1]], line: 34, type: [[META10]]) -; DEBUGLOC: [[META76]] = !DILocalVariable(name: "20", scope: [[DBG71]], file: [[META1]], line: 36, type: [[META10]]) -; DEBUGLOC: [[META77]] = !DILocalVariable(name: "21", scope: [[DBG71]], file: [[META1]], line: 37, type: [[META17]]) -; DEBUGLOC: [[DBG78]] = !DILocation(line: 31, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG79]] = !DILocation(line: 32, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG80]] = !DILocation(line: 33, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG81]] = !DILocation(line: 34, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG82]] = !DILocation(line: 35, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG83]] = !DILocation(line: 38, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[LOOP84]] = distinct !{[[LOOP84]], [[META26]], [[META27]]} -; DEBUGLOC: [[DBG85]] = !DILocation(line: 36, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG86]] = !DILocation(line: 37, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[LOOP87]] = distinct !{[[LOOP87]], [[META26]]} -; DEBUGLOC: [[DBG88]] = !DILocation(line: 39, column: 1, scope: [[DBG71]]) -; DEBUGLOC: [[DBG89]] = distinct !DISubprogram(name: "widen_intrinsic_dbg", linkageName: "widen_intrinsic_dbg", scope: null, file: [[META1]], line: 40, type: [[META6]], scopeLine: 40, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META90:![0-9]+]]) -; DEBUGLOC: [[META90]] = !{[[META91]], [[META92]], [[META93]], [[META94]], [[META95]], [[META96]], [[META97]]} -; DEBUGLOC: [[META91]] = !DILocalVariable(name: "22", scope: [[DBG89]], file: [[META1]], line: 41, type: [[META10]]) -; DEBUGLOC: [[META92]] = !DILocalVariable(name: "23", scope: [[DBG89]], file: [[META1]], line: 42, type: [[META10]]) -; DEBUGLOC: [[META93]] = !DILocalVariable(name: "24", scope: [[DBG89]], file: [[META1]], line: 43, type: [[META13]]) -; DEBUGLOC: [[META94]] = !DILocalVariable(name: "25", scope: [[DBG89]], file: [[META1]], line: 44, type: [[META13]]) -; DEBUGLOC: [[META95]] = !DILocalVariable(name: "26", scope: [[DBG89]], file: [[META1]], line: 45, type: [[META10]]) -; DEBUGLOC: [[META96]] = !DILocalVariable(name: "27", scope: [[DBG89]], file: [[META1]], line: 47, type: [[META10]]) -; DEBUGLOC: [[META97]] = !DILocalVariable(name: "28", scope: [[DBG89]], file: [[META1]], line: 48, type: [[META17]]) -; DEBUGLOC: [[DBG98]] = !DILocation(line: 40, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG99]] = !DILocation(line: 41, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG100]] = !DILocation(line: 42, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG101]] = !DILocation(line: 43, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG102]] = !DILocation(line: 44, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG103]] = !DILocation(line: 45, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG104]] = !DILocation(line: 46, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG105]] = !DILocation(line: 49, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[LOOP106]] = distinct !{[[LOOP106]], [[META26]], [[META27]]} -; DEBUGLOC: [[DBG107]] = !DILocation(line: 47, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[DBG108]] = !DILocation(line: 48, column: 1, scope: [[DBG89]]) -; DEBUGLOC: [[LOOP109]] = distinct !{[[LOOP109]], [[META26]]} -; DEBUGLOC: [[DBG110]] = !DILocation(line: 50, column: 1, scope: [[DBG89]]) +; DEBUGLOC: [[DBG28]] = !DILocation(line: 10, column: 1, scope: [[DBG5]]) +; DEBUGLOC: [[DBG29]] = distinct !DISubprogram(name: "widen_ptr_induction_dbg", linkageName: "widen_ptr_induction_dbg", scope: null, file: [[META1]], line: 11, type: [[META6]], scopeLine: 11, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META30:![0-9]+]]) +; DEBUGLOC: [[META30]] = !{[[META31]], [[META32]], [[META33]]} +; DEBUGLOC: [[META31]] = !DILocalVariable(name: "7", scope: [[DBG29]], file: [[META1]], line: 12, type: [[META10]]) +; DEBUGLOC: [[META32]] = !DILocalVariable(name: "8", scope: [[DBG29]], file: [[META1]], line: 13, type: [[META10]]) +; DEBUGLOC: [[META33]] = !DILocalVariable(name: "9", scope: [[DBG29]], file: [[META1]], line: 15, type: [[META17]]) +; DEBUGLOC: [[DBG34]] = !DILocation(line: 11, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[DBG35]] = !DILocation(line: 12, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[DBG36]] = !DILocation(line: 14, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[DBG37]] = !DILocation(line: 16, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[LOOP38]] = distinct !{[[LOOP38]], [[META26]], [[META27]]} +; DEBUGLOC: [[DBG39]] = !DILocation(line: 13, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[DBG40]] = !DILocation(line: 15, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[LOOP41]] = distinct !{[[LOOP41]], [[META27]], [[META26]]} +; DEBUGLOC: [[DBG42]] = !DILocation(line: 17, column: 1, scope: [[DBG29]]) +; DEBUGLOC: [[DBG43]] = distinct !DISubprogram(name: "predicated_phi_dbg", linkageName: "predicated_phi_dbg", scope: null, file: [[META1]], line: 18, type: [[META6]], scopeLine: 18, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META44:![0-9]+]]) +; DEBUGLOC: [[META44]] = !{[[META45]], [[META46]], [[META47]], [[META48]], [[META49]], [[META50]], [[META51]]} +; DEBUGLOC: [[META45]] = !DILocalVariable(name: "10", scope: [[DBG43]], file: [[META1]], line: 19, type: [[META10]]) +; DEBUGLOC: [[META46]] = !DILocalVariable(name: "11", scope: [[DBG43]], file: [[META1]], line: 20, type: [[META17]]) +; DEBUGLOC: [[META47]] = !DILocalVariable(name: "12", scope: [[DBG43]], file: [[META1]], line: 22, type: [[META10]]) +; DEBUGLOC: [[META48]] = !DILocalVariable(name: "13", scope: [[DBG43]], file: [[META1]], line: 24, type: [[META10]]) +; DEBUGLOC: [[META49]] = !DILocalVariable(name: "14", scope: [[DBG43]], file: [[META1]], line: 25, type: [[META10]]) +; DEBUGLOC: [[META50]] = !DILocalVariable(name: "15", scope: [[DBG43]], file: [[META1]], line: 27, type: [[META10]]) +; DEBUGLOC: [[META51]] = !DILocalVariable(name: "16", scope: [[DBG43]], file: [[META1]], line: 28, type: [[META17]]) +; DEBUGLOC: [[DBG52]] = !DILocation(line: 18, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG53]] = !DILocation(line: 19, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG54]] = !DILocation(line: 20, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG55]] = !DILocation(line: 22, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG56]] = !DILocation(line: 24, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG57]] = !DILocation(line: 25, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG58]] = !DILocation(line: 26, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG59]] = !DILocation(line: 29, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[LOOP60]] = distinct !{[[LOOP60]], [[META26]], [[META27]]} +; DEBUGLOC: [[DBG61]] = !DILocation(line: 21, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG62]] = !DILocation(line: 23, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG63]] = !DILocation(line: 27, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG64]] = !DILocation(line: 28, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[LOOP65]] = distinct !{[[LOOP65]], [[META27]], [[META26]]} +; DEBUGLOC: [[DBG66]] = !DILocation(line: 30, column: 1, scope: [[DBG43]]) +; DEBUGLOC: [[DBG67]] = distinct !DISubprogram(name: "scalar_cast_dbg", linkageName: "scalar_cast_dbg", scope: null, file: [[META1]], line: 31, type: [[META6]], scopeLine: 31, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META68:![0-9]+]]) +; DEBUGLOC: [[META68]] = !{[[META69]], [[META70]], [[META71]], [[META72]], [[META73]]} +; DEBUGLOC: [[META69]] = !DILocalVariable(name: "17", scope: [[DBG67]], file: [[META1]], line: 32, type: [[META10]]) +; DEBUGLOC: [[META70]] = !DILocalVariable(name: "18", scope: [[DBG67]], file: [[META1]], line: 33, type: [[META13]]) +; DEBUGLOC: [[META71]] = !DILocalVariable(name: "19", scope: [[DBG67]], file: [[META1]], line: 34, type: [[META10]]) +; DEBUGLOC: [[META72]] = !DILocalVariable(name: "20", scope: [[DBG67]], file: [[META1]], line: 36, type: [[META10]]) +; DEBUGLOC: [[META73]] = !DILocalVariable(name: "21", scope: [[DBG67]], file: [[META1]], line: 37, type: [[META17]]) +; DEBUGLOC: [[DBG74]] = !DILocation(line: 31, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG75]] = !DILocation(line: 32, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG76]] = !DILocation(line: 33, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG77]] = !DILocation(line: 34, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG78]] = !DILocation(line: 35, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG79]] = !DILocation(line: 38, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[LOOP80]] = distinct !{[[LOOP80]], [[META26]], [[META27]]} +; DEBUGLOC: [[DBG81]] = !DILocation(line: 36, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG82]] = !DILocation(line: 37, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[LOOP83]] = distinct !{[[LOOP83]], [[META26]]} +; DEBUGLOC: [[DBG84]] = !DILocation(line: 39, column: 1, scope: [[DBG67]]) +; DEBUGLOC: [[DBG85]] = distinct !DISubprogram(name: "widen_intrinsic_dbg", linkageName: "widen_intrinsic_dbg", scope: null, file: [[META1]], line: 40, type: [[META6]], scopeLine: 40, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META86:![0-9]+]]) +; DEBUGLOC: [[META86]] = !{[[META87]], [[META88]], [[META89]], [[META90]], [[META91]], [[META92]], [[META93]]} +; DEBUGLOC: [[META87]] = !DILocalVariable(name: "22", scope: [[DBG85]], file: [[META1]], line: 41, type: [[META10]]) +; DEBUGLOC: [[META88]] = !DILocalVariable(name: "23", scope: [[DBG85]], file: [[META1]], line: 42, type: [[META10]]) +; DEBUGLOC: [[META89]] = !DILocalVariable(name: "24", scope: [[DBG85]], file: [[META1]], line: 43, type: [[META13]]) +; DEBUGLOC: [[META90]] = !DILocalVariable(name: "25", scope: [[DBG85]], file: [[META1]], line: 44, type: [[META13]]) +; DEBUGLOC: [[META91]] = !DILocalVariable(name: "26", scope: [[DBG85]], file: [[META1]], line: 45, type: [[META10]]) +; DEBUGLOC: [[META92]] = !DILocalVariable(name: "27", scope: [[DBG85]], file: [[META1]], line: 47, type: [[META10]]) +; DEBUGLOC: [[META93]] = !DILocalVariable(name: "28", scope: [[DBG85]], file: [[META1]], line: 48, type: [[META17]]) +; DEBUGLOC: [[DBG94]] = !DILocation(line: 40, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG95]] = !DILocation(line: 41, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG96]] = !DILocation(line: 42, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG97]] = !DILocation(line: 43, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG98]] = !DILocation(line: 44, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG99]] = !DILocation(line: 45, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG100]] = !DILocation(line: 46, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG101]] = !DILocation(line: 49, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[LOOP102]] = distinct !{[[LOOP102]], [[META26]], [[META27]]} +; DEBUGLOC: [[DBG103]] = !DILocation(line: 47, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[DBG104]] = !DILocation(line: 48, column: 1, scope: [[DBG85]]) +; DEBUGLOC: [[LOOP105]] = distinct !{[[LOOP105]], [[META26]]} +; DEBUGLOC: [[DBG106]] = !DILocation(line: 50, column: 1, scope: [[DBG85]]) ;. diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll index 57f0dc2..787fa31 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-reduction-inloop.ll @@ -22,7 +22,7 @@ loop: %load = load i32, ptr %gep, align 4 %red.next = add i32 %red, %load %iv.next = add i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 256 + %exitcond = icmp eq i64 %iv.next, 257 br i1 %exitcond, label %exit, label %loop exit: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll index f20d492..73ddddc 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll @@ -20,10 +20,6 @@ define i32 @reduction_smin(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -66,10 +62,6 @@ define i32 @reduction_smin_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -111,10 +103,6 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] @@ -159,10 +147,6 @@ define i32 @reduction_umax(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -205,10 +189,6 @@ define i32 @reduction_umax_select_ops_flipped(ptr nocapture %A, ptr nocapture %B ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -250,10 +230,6 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: ret i32 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 925290b..1b9dcad 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -61,11 +61,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP26]] ; @@ -170,11 +166,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP48]] ; @@ -263,11 +255,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP29]] ; @@ -373,11 +361,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP48]] ; @@ -485,11 +469,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP46]] ; @@ -594,11 +574,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: ret i32 [[TMP45]] ; @@ -701,11 +677,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[TMP45]] ; @@ -806,11 +778,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[TMP43]] ; @@ -911,11 +879,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[TMP43]] ; @@ -1016,11 +980,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret float [[TMP43]] ; @@ -1123,11 +1083,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret float [[TMP45]] ; @@ -1211,11 +1167,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -1297,11 +1249,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[RDX_MINMAX]] ; @@ -1356,21 +1304,7 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then8: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] -; CHECK: if.then16: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) ; CHECK-NEXT: ret float [[SUM_1_LCSSA]] @@ -1478,11 +1412,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8> @@ -1572,11 +1502,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index cad3ca1..183462f 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -35,11 +35,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[BIN_RDX7:%.*]] = add i32 [[TMP9]], [[BIN_RDX]] @@ -114,11 +110,7 @@ define i64 @reduction_sum_chain(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i64 [[TMP19]], [[TMP17]] ; CHECK-NEXT: [[BIN_RDX11:%.*]] = add i64 [[TMP21]], [[BIN_RDX]] @@ -345,11 +337,7 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272 ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP104]], [[TMP101]] ; CHECK-NEXT: [[BIN_RDX34:%.*]] = add i32 [[TMP107]], [[BIN_RDX]] @@ -581,17 +569,9 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP112]], [[TMP109]] ; CHECK-NEXT: [[BIN_RDX36:%.*]] = mul i32 [[TMP115]], [[BIN_RDX]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index f4d4cca..ec7fde8 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -23,21 +23,8 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L7]], %[[DOTLR_PH]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP2]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_single( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]]) { @@ -61,22 +48,9 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP5]], [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = add i32 [[TMP5]], [[TMP3]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L7]] = add i32 [[SUM_02]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L7]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -125,26 +99,8 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L6]] -; CHECK-NEXT: [[L8:%.*]] = add i32 [[L7]], [[L3]] -; CHECK-NEXT: [[L9]] = add i32 [[L8]], [[L5]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -183,27 +139,9 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L6]] -; CHECK-INTERLEAVED-NEXT: [[L8:%.*]] = add i32 [[L7]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L9]] = add i32 [[L8]], [[L5]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -251,22 +189,8 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] -; CHECK-NEXT: [[L9]] = add i32 [[L7]], 3 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP3]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_const( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]]) { @@ -294,23 +218,9 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP7]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = add i32 [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L9]] = add i32 [[L7]], 3 -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -360,26 +270,8 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[L7:%.*]] = mul i32 [[PROD_02]], [[L6]] -; CHECK-NEXT: [[L8:%.*]] = mul i32 [[L7]], [[L3]] -; CHECK-NEXT: [[L9]] = mul i32 [[L8]], [[L5]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_prod( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -418,27 +310,9 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP15]], [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[PROD_0_LCSSA:%.*]] = mul i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 1, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = mul i32 [[PROD_02]], [[L6]] -; CHECK-INTERLEAVED-NEXT: [[L8:%.*]] = mul i32 [[L7]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L9]] = mul i32 [[L8]], [[L5]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[PROD_0_LCSSA]] ; entry: @@ -491,26 +365,8 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-NEXT: [[L6:%.*]] = mul nsw i32 [[L5]], [[L3]] -; CHECK-NEXT: [[L7:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[L8:%.*]] = add i32 [[SUM_02]], [[L7]] -; CHECK-NEXT: [[L9]] = add i32 [[L8]], [[L6]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_mix( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -547,27 +403,9 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP10]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = add i32 [[TMP13]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L6:%.*]] = mul nsw i32 [[L5]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L8:%.*]] = add i32 [[SUM_02]], [[L7]] -; CHECK-INTERLEAVED-NEXT: [[L9]] = add i32 [[L8]], [[L6]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -617,24 +455,8 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-NEXT: [[L6:%.*]] = mul i32 [[SUM_02]], [[L3]] -; CHECK-NEXT: [[L7]] = mul i32 [[L6]], [[L5]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L7]], %[[DOTLR_PH]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_mul( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -666,25 +488,9 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP11]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = mul i32 [[TMP11]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L7:%.*]], %[[DOTLR_PH]] ], [ 19, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L6:%.*]] = mul i32 [[SUM_02]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L7]] = mul i32 [[L6]], [[L5]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L7]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -731,24 +537,8 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 120, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[L1]], [[L0]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @start_at_non_zero( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[IN:%.*]], ptr captures(none) [[COEFF:%.*]], ptr captures(none) [[OUT:%.*]]) { @@ -780,24 +570,8 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP9]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 120, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = mul nsw i32 [[L1]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] ; entry: br label %for.body @@ -844,24 +618,8 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], %[[FOR_BODY]] ], [ -1, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[L0]] -; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[L1]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[AND]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_and( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { @@ -893,25 +651,9 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = and i32 [[TMP11]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = and i32 [[TMP11]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], %[[FOR_BODY]] ], [ -1, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[AND]] = and i32 [[ADD]], [[L1]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[AND]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -958,24 +700,8 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] -; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_or( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { @@ -1005,25 +731,9 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = or i32 [[TMP9]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = or i32 [[TMP9]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -1070,24 +780,8 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] -; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[XOR]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_xor( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { @@ -1117,25 +811,9 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = xor i32 [[TMP9]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = xor i32 [[TMP9]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[XOR]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[RESULT_0_LCSSA]] ; entry: @@ -1183,24 +861,8 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[L0]] -; CHECK-NEXT: [[FADD]] = fadd fast float [[ADD]], [[L1]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FADD]], %[[FOR_BODY]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP3]] ; ; CHECK-INTERLEAVED-LABEL: define float @reduction_fadd( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { @@ -1232,25 +894,9 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = fadd fast float [[TMP7]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = fadd fast float [[TMP7]], [[TMP6]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[FADD]] = fadd fast float [[ADD]], [[L1]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FADD]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: @@ -1298,24 +944,8 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[L0]] -; CHECK-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[L1]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FMUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; CHECK-NEXT: ret float [[TMP5]] ; ; CHECK-INTERLEAVED-LABEL: define float @reduction_fmul( ; CHECK-INTERLEAVED-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { @@ -1347,25 +977,9 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = fmul fast float [[TMP11]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = fmul fast float [[TMP11]], [[TMP9]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[L1]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FMUL]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: @@ -1410,21 +1024,8 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ 3, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[SUB]] = sub nsw i32 [[X_05]], [[L0]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP5]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sub_lhs( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]]) { @@ -1450,21 +1051,8 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP5]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ 3, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[SUB]] = sub nsw i32 [[X_05]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] ; entry: br label %for.body @@ -1519,38 +1107,8 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] -; CHECK-NEXT: [[SUM_033:%.*]] = phi float [ [[S]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[L0]], [[L1]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[IF_THEN:.*]], label %[[FOR_INC]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt float [[L1]], 1.000000e+00 -; CHECK-NEXT: br i1 [[CMP6]], label %[[IF_THEN8:.*]], label %[[IF_ELSE:.*]] -; CHECK: [[IF_THEN8]]: -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_033]], [[L0]] -; CHECK-NEXT: br label %[[FOR_INC]] -; CHECK: [[IF_ELSE]]: -; CHECK-NEXT: [[CMP14:%.*]] = fcmp ogt float [[L0]], 2.000000e+00 -; CHECK-NEXT: br i1 [[CMP14]], label %[[IF_THEN16:.*]], label %[[FOR_INC]] -; CHECK: [[IF_THEN16]]: -; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[SUM_033]], [[L1]] -; CHECK-NEXT: br label %[[FOR_INC]] -; CHECK: [[FOR_INC]]: -; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN8]] ], [ [[ADD19]], %[[IF_THEN16]] ], [ [[SUM_033]], %[[IF_ELSE]] ], [ [[SUM_033]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], 128 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[SUM_1_LCSSA]] +; CHECK-NEXT: ret float [[TMP13]] ; ; CHECK-INTERLEAVED-LABEL: define float @reduction_conditional( ; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], float [[S:%.*]]) { @@ -1602,38 +1160,8 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[PREDPHI9]], [[PREDPHI6]] ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_033:%.*]] = phi float [ [[S]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[CMP3:%.*]] = fcmp ogt float [[L0]], [[L1]] -; CHECK-INTERLEAVED-NEXT: br i1 [[CMP3]], label %[[IF_THEN:.*]], label %[[FOR_INC]] -; CHECK-INTERLEAVED: [[IF_THEN]]: -; CHECK-INTERLEAVED-NEXT: [[CMP6:%.*]] = fcmp ogt float [[L1]], 1.000000e+00 -; CHECK-INTERLEAVED-NEXT: br i1 [[CMP6]], label %[[IF_THEN8:.*]], label %[[IF_ELSE:.*]] -; CHECK-INTERLEAVED: [[IF_THEN8]]: -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_033]], [[L0]] -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_INC]] -; CHECK-INTERLEAVED: [[IF_ELSE]]: -; CHECK-INTERLEAVED-NEXT: [[CMP14:%.*]] = fcmp ogt float [[L0]], 2.000000e+00 -; CHECK-INTERLEAVED-NEXT: br i1 [[CMP14]], label %[[IF_THEN16:.*]], label %[[FOR_INC]] -; CHECK-INTERLEAVED: [[IF_THEN16]]: -; CHECK-INTERLEAVED-NEXT: [[ADD19:%.*]] = fadd fast float [[SUM_033]], [[L1]] -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_INC]] -; CHECK-INTERLEAVED: [[FOR_INC]]: -; CHECK-INTERLEAVED-NEXT: [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN8]] ], [ [[ADD19]], %[[IF_THEN16]] ], [ [[SUM_033]], %[[IF_ELSE]] ], [ [[SUM_033]], %[[FOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], 128 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP24]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret float [[SUM_1_LCSSA]] +; CHECK-INTERLEAVED-NEXT: ret float [[TMP24]] ; entry: br label %for.body @@ -1679,11 +1207,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[_LR_PH1:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] ; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1703,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] ; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1778,26 +1306,8 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L6]] -; CHECK-NEXT: [[L8:%.*]] = add i32 [[L7]], [[L3]] -; CHECK-NEXT: [[L9]] = add i32 [[L8]], [[L5]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP7]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_predicated( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -1836,27 +1346,9 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: -; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP13]] +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = add i32 [[TMP15]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L5:%.*]] = load i32, ptr [[L4]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L7:%.*]] = add i32 [[SUM_02]], [[L6]] -; CHECK-INTERLEAVED-NEXT: [[L8:%.*]] = add i32 [[L7]], [[L3]] -; CHECK-INTERLEAVED-NEXT: [[L9]] = add i32 [[L8]], [[L5]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -1902,27 +1394,13 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 -; CHECK-NEXT: [[L3E:%.*]] = zext i8 [[L3]] to i32 -; CHECK-NEXT: [[L9]] = add i32 [[SUM_02]], [[L3E]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA1:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[SUM_0_LCSSA1]] to i8 +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[TMP8]] to i8 ; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] ; ; CHECK-INTERLEAVED-LABEL: define i8 @reduction_add_trunc( @@ -1951,28 +1429,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP11]] = zext <4 x i8> [[TMP9]] to <4 x i32> ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i8> [[TMP9]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L3E:%.*]] = zext i8 [[L3]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L9]] = add i32 [[SUM_02]], [[L3E]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA1:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP14]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[SUM_0_LCSSA1]] to i8 +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[TMP14]] to i8 ; CHECK-INTERLEAVED-NEXT: ret i8 [[SUM_0_LCSSA]] ; entry: @@ -2016,27 +1480,13 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 -; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] -; CHECK-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 -; CHECK-NEXT: [[L3E:%.*]] = zext i8 [[L3]] to i32 -; CHECK-NEXT: [[L9]] = and i32 [[SUM_02]], [[L3E]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[SUM_0_LCSSA1:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[SUM_0_LCSSA1]] to i8 +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[TMP8]] to i8 ; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] ; ; CHECK-INTERLEAVED-LABEL: define i8 @reduction_and_trunc( @@ -2065,28 +1515,14 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP11]] = zext <4 x i8> [[TMP9]] to <4 x i32> ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = and <4 x i8> [[TMP9]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[BIN_RDX]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-INTERLEAVED-NEXT: br [[DOT_CRIT_EDGE:label %.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02P:%.*]] = phi i32 [ [[L9:%.*]], %[[DOTLR_PH]] ], [ 255, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = and i32 [[SUM_02P]], 255 -; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i8, ptr [[L2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L3E:%.*]] = zext i8 [[L3]] to i32 -; CHECK-INTERLEAVED-NEXT: [[L9]] = and i32 [[SUM_02]], [[L3E]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], [[DOT_CRIT_EDGE]], label %[[DOTLR_PH]] ; CHECK-INTERLEAVED: [[__CRIT_EDGE:.*:]] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA1:%.*]] = phi i32 [ [[L9]], %[[DOTLR_PH]] ], [ [[TMP14]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[SUM_0_LCSSA1]] to i8 +; CHECK-INTERLEAVED-NEXT: [[SUM_0_LCSSA:%.*]] = trunc i32 [[TMP14]] to i8 ; CHECK-INTERLEAVED-NEXT: ret i8 [[SUM_0_LCSSA]] ; entry: @@ -2133,7 +1569,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP4]] = fadd float [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] @@ -2151,7 +1587,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP6]], float [[TMP7]], float [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[MULADD_LCSSA]] @@ -2185,7 +1621,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-INTERLEAVED-NEXT: [[TMP9]] = fadd float [[VEC_PHI1]], [[TMP8]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = fadd float [[TMP9]], [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2204,7 +1640,7 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-INTERLEAVED-NEXT: [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP11]], float [[TMP12]], float [[SUM_07]]) ; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-INTERLEAVED: [[FOR_END]]: ; CHECK-INTERLEAVED-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret float [[MULADD_LCSSA]] @@ -2373,7 +1809,7 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-NEXT: [[TMP7]] = fadd float [[VEC_PHI]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -2388,17 +1824,17 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: br i1 [[C]], label %[[FOO:.*]], label %[[BAR:.*]] -; CHECK: [[FOO]]: +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: ; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[BAR]]: +; CHECK: [[ELSE]]: ; CHECK-NEXT: [[MULADD:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP9]], float [[TMP10]], float [[SUM]]) ; CHECK-NEXT: br label %[[LATCH]] ; CHECK: [[LATCH]]: -; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[FOO]] ], [ [[MULADD]], %[[BAR]] ] +; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[IF]] ], [ [[MULADD]], %[[ELSE]] ] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi float [ [[SUM_NEXT]], %[[LATCH]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[SUM_NEXT_LCSSA]] @@ -2437,7 +1873,7 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-INTERLEAVED-NEXT: [[TMP13]] = fadd float [[VEC_PHI1]], [[TMP12]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = fadd float [[TMP13]], [[TMP10]] ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -2463,7 +1899,7 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-INTERLEAVED-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], %[[IF]] ], [ [[MULADD]], %[[ELSE]] ] ; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK-INTERLEAVED: [[EXIT]]: ; CHECK-INTERLEAVED-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi float [ [[SUM_NEXT]], %[[LATCH]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret float [[SUM_NEXT_LCSSA]] @@ -2524,7 +1960,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[TMP7]] = add i32 [[VEC_PHI]], [[TMP6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END7:.*]], label %[[SCALAR_PH]] @@ -2550,7 +1986,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[G_016]], %[[FOR_BODY2]] ] ; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[FOR_END7]]: ; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], %[[FOR_INC5]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[G_1_LCSSA]] @@ -2590,7 +2026,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-INTERLEAVED-NEXT: [[TMP14]] = add i32 [[VEC_PHI1]], [[TMP13]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP14]], [[TMP11]] ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]] @@ -2617,7 +2053,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-INTERLEAVED-NEXT: [[G_1]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[G_016]], %[[FOR_BODY2]] ] ; CHECK-INTERLEAVED-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK-INTERLEAVED: [[FOR_END7]]: ; CHECK-INTERLEAVED-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], %[[FOR_INC5]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[G_1_LCSSA]] @@ -2680,7 +2116,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP11]] = add i32 [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END7:.*]], label %[[SCALAR_PH]] @@ -2707,7 +2143,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[G_016]], %[[FOR_BODY2]] ] ; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[FOR_END7]]: ; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], %[[FOR_INC5]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[G_1_LCSSA]] @@ -2753,7 +2189,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[TMP14]], [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP20]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]] @@ -2781,7 +2217,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[G_1]] = phi i32 [ [[ADD]], %[[IF_THEN]] ], [ [[G_016]], %[[FOR_BODY2]] ] ; CHECK-INTERLEAVED-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1 ; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]] -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END7]], label %[[FOR_BODY2]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK-INTERLEAVED: [[FOR_END7]]: ; CHECK-INTERLEAVED-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], %[[FOR_INC5]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] ; CHECK-INTERLEAVED-NEXT: ret i32 [[G_1_LCSSA]] @@ -2890,34 +2326,11 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP48]] = add i32 [[VEC_PHI]], [[TMP47]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP49]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP49]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP]]: -; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], %[[FOR_INC:.*]] ], [ [[TMP48]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[A_1_LCSSA]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[G_09:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_INC]] ] -; CHECK-NEXT: [[A_08:%.*]] = phi i32 [ undef, %[[SCALAR_PH]] ], [ [[A_1]], %[[FOR_INC]] ] -; CHECK-NEXT: [[D:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[G_09]], i32 1 -; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[D]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP45]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[LOR_LHS_FALSE:.*]], label %[[IF_THEN:.*]] -; CHECK: [[LOR_LHS_FALSE]]: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[G_09]] -; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL2_NOT:%.*]] = icmp eq i32 [[TMP46]], 0 -; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label %[[FOR_INC]], label %[[IF_THEN]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[A_08]], 1 -; CHECK-NEXT: br label %[[FOR_INC]] -; CHECK: [[FOR_INC]]: -; CHECK-NEXT: [[A_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[A_08]], %[[LOR_LHS_FALSE]] ] -; CHECK-NEXT: [[INC3]] = add nuw nsw i32 [[G_09]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC3]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; CHECK-NEXT: ret i32 [[TMP48]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @predicated_or_dominates_reduction( ; CHECK-INTERLEAVED-SAME: ptr [[B:%.*]]) { @@ -3051,35 +2464,12 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP98]] = add i32 [[VEC_PHI1]], [[TMP97]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP99:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP99]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP99]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP98]], [[TMP94]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_COND_CLEANUP:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-INTERLEAVED: [[FOR_COND_CLEANUP]]: -; CHECK-INTERLEAVED-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], %[[FOR_INC:.*]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret i32 [[A_1_LCSSA]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[G_09:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_INC]] ] -; CHECK-INTERLEAVED-NEXT: [[A_08:%.*]] = phi i32 [ undef, %[[SCALAR_PH]] ], [ [[A_1]], %[[FOR_INC]] ] -; CHECK-INTERLEAVED-NEXT: [[D:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[G_09]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP100:%.*]] = load i32, ptr [[D]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP100]], 0 -; CHECK-INTERLEAVED-NEXT: br i1 [[TOBOOL_NOT]], label %[[LOR_LHS_FALSE:.*]], label %[[IF_THEN:.*]] -; CHECK-INTERLEAVED: [[LOR_LHS_FALSE]]: -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[G_09]] -; CHECK-INTERLEAVED-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TOBOOL2_NOT:%.*]] = icmp eq i32 [[TMP101]], 0 -; CHECK-INTERLEAVED-NEXT: br i1 [[TOBOOL2_NOT]], label %[[FOR_INC]], label %[[IF_THEN]] -; CHECK-INTERLEAVED: [[IF_THEN]]: -; CHECK-INTERLEAVED-NEXT: [[INC:%.*]] = add nsw i32 [[A_08]], 1 -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_INC]] -; CHECK-INTERLEAVED: [[FOR_INC]]: -; CHECK-INTERLEAVED-NEXT: [[A_1]] = phi i32 [ [[INC]], %[[IF_THEN]] ], [ [[A_08]], %[[LOR_LHS_FALSE]] ] -; CHECK-INTERLEAVED-NEXT: [[INC3]] = add nuw nsw i32 [[G_09]], 1 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC3]], 1000 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] ; entry: br label %for.body @@ -3135,27 +2525,11 @@ define i32 @reduction_add_sub(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[L0_B:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X_05]], [[L0]] -; CHECK-NEXT: [[SUB]] = sub nsw i32 [[ADD]], [[L0_B]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_add_sub( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -3187,28 +2561,12 @@ define i32 @reduction_add_sub(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add i32 [[TMP9]], [[TMP12]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP11]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L0_B:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; CHECK-INTERLEAVED-NEXT: [[ADD:%.*]] = add nsw i32 [[X_05]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[SUB]] = sub nsw i32 [[ADD]], [[L0_B]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[SUB]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] ; entry: br label %for.body @@ -3254,27 +2612,11 @@ define i32 @reduction_sub_add(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[FOR_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[L0_B:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[L0]] -; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUB]], [[L0_B]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sub_add( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { @@ -3306,28 +2648,12 @@ define i32 @reduction_sub_add(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[TMP13]] = add i32 [[TMP9]], [[TMP12]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP11]] ; CHECK-INTERLEAVED-NEXT: br label %[[FOR_END:.*]] -; CHECK-INTERLEAVED: [[SCALAR_PH:.*]]: -; CHECK-INTERLEAVED-NEXT: br label %[[FOR_BODY:.*]] -; CHECK-INTERLEAVED: [[FOR_BODY]]: -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[X_05:%.*]] = phi i32 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-INTERLEAVED-NEXT: [[L0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-INTERLEAVED-NEXT: [[L0_B:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; CHECK-INTERLEAVED-NEXT: [[SUB:%.*]] = sub nsw i32 [[X_05]], [[L0]] -; CHECK-INTERLEAVED-NEXT: [[ADD]] = add nsw i32 [[SUB]], [[L0_B]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-INTERLEAVED-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256 -; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]] ; CHECK-INTERLEAVED: [[FOR_END]]: -; CHECK-INTERLEAVED-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] -; CHECK-INTERLEAVED-NEXT: ret i32 [[X_0_LCSSA]] +; CHECK-INTERLEAVED-NEXT: ret i32 [[BIN_RDX]] ; entry: br label %for.body @@ -3351,6 +2677,129 @@ for.end: ; preds = %for.body, %entry ret i32 %x.0.lcssa } +; Test that bundling recipes that share an operand into an expression works. +; In this case the two extends are the recipes that share an operand. +define i64 @reduction_expression_same_operands(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { +; CHECK-LABEL: define i64 @reduction_expression_same_operands( +; CHECK-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i64> [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[TMP6]] = add i64 [[VEC_PHI]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[IV]] +; CHECK-NEXT: [[LOAD0:%.*]] = load i16, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CONV0:%.*]] = sext i16 [[LOAD0]] to i32 +; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD0]] to i32 +; CHECK-NEXT: [[MUL1:%.*]] = mul nsw i32 [[CONV0]], [[CONV1]] +; CHECK-NEXT: [[MUL:%.*]] = sext i32 [[MUL1]] to i64 +; CHECK-NEXT: [[RDX_NEXT]] = add nsw i64 [[RDX]], [[MUL]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_0_LCSSA]] +; +; CHECK-INTERLEAVED-LABEL: define i64 @reduction_expression_same_operands( +; CHECK-INTERLEAVED-SAME: ptr readonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], i32 [[N:%.*]]) { +; CHECK-INTERLEAVED-NEXT: [[ENTRY:.*]]: +; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 +; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-INTERLEAVED: [[VECTOR_PH]]: +; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 8 +; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-INTERLEAVED-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-INTERLEAVED: [[VECTOR_BODY]]: +; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP2]], align 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = mul nsw <4 x i64> [[TMP4]], [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]]) +; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add i64 [[VEC_PHI]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD2]] to <4 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nsw <4 x i64> [[TMP9]], [[TMP9]] +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP10]]) +; CHECK-INTERLEAVED-NEXT: [[TMP12]] = add i64 [[VEC_PHI1]], [[TMP11]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-INTERLEAVED: [[MIDDLE_BLOCK]]: +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i64 [[TMP12]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-INTERLEAVED: [[SCALAR_PH]]: +; CHECK-INTERLEAVED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-INTERLEAVED-NEXT: br label %[[LOOP:.*]] +; CHECK-INTERLEAVED: [[LOOP]]: +; CHECK-INTERLEAVED-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-INTERLEAVED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[IV]] +; CHECK-INTERLEAVED-NEXT: [[LOAD0:%.*]] = load i16, ptr [[ARRAYIDX]], align 4 +; CHECK-INTERLEAVED-NEXT: [[CONV0:%.*]] = sext i16 [[LOAD0]] to i32 +; CHECK-INTERLEAVED-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD0]] to i32 +; CHECK-INTERLEAVED-NEXT: [[MUL1:%.*]] = mul nsw i32 [[CONV0]], [[CONV1]] +; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = sext i32 [[MUL1]] to i64 +; CHECK-INTERLEAVED-NEXT: [[RDX_NEXT]] = add nsw i64 [[RDX]], [[MUL]] +; CHECK-INTERLEAVED-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-INTERLEAVED-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-INTERLEAVED-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-INTERLEAVED: [[EXIT]]: +; CHECK-INTERLEAVED-NEXT: [[R_0_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[BIN_RDX]], %[[MIDDLE_BLOCK]] ] +; CHECK-INTERLEAVED-NEXT: ret i64 [[R_0_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i16, ptr %x, i32 %iv + %load0 = load i16, ptr %arrayidx, align 4 + %conv0 = sext i16 %load0 to i32 + %conv1 = sext i16 %load0 to i32 + %mul = mul nsw i32 %conv0, %conv1 + %conv = sext i32 %mul to i64 + %rdx.next = add nsw i64 %rdx, %conv + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + %r.0.lcssa = phi i64 [ %rdx.next, %loop ] + ret i64 %r.0.lcssa +} + declare float @llvm.fmuladd.f32(float, float, float) !6 = distinct !{!6, !7, !8} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 7d35ad0..855a0ce 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -60,11 +60,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] @@ -162,11 +158,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] @@ -267,11 +259,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[DOT_CRIT_EDGE:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] -; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP42]]) ; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] @@ -371,11 +359,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP42]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -475,11 +459,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP42]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -579,11 +559,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP42]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -683,11 +659,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP42]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] @@ -787,11 +759,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP42]]) ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] @@ -874,11 +842,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] @@ -959,11 +923,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]]) ; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll index 916a83a..65d5701 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction.ll @@ -775,21 +775,7 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; CHECK: if.then: -; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then8: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] -; CHECK: if.then16: -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) ; CHECK-NEXT: ret float [[SUM_1_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll index e6ad593..e621b80 100644 --- a/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/remarks-reduction-inloop.ll @@ -24,20 +24,8 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[BODY:.*]] -; CHECK: [[BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[SUM_TMP:%.*]] = phi i32 [ [[SUM:%.*]], %[[BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD0:%.*]] = load i32, ptr [[GEP0]], align 4 -; CHECK-NEXT: [[SUM]] = add i32 [[SUM_TMP]], [[LOAD0]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[BODY]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM]], %[[BODY]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP4]] ; entry: br label %body diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 826696f..0896848 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -25,22 +25,8 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1 -; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[REV_IND_NEXT]] -; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4 -; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]] ; CHECK: [[END]]: -; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %loop @@ -85,22 +71,8 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1 -; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]] -; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4 -; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]] ; CHECK: [[END]]: -; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %loop @@ -145,22 +117,8 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND:%.*]] = phi i64 [ [[START]], %[[SCALAR_PH]] ], [ [[REV_IND_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REDUX:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[REDUX_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[REV_IND_NEXT]] = add i64 [[REV_IND]], -1 -; CHECK-NEXT: [[GEP_PTR_IND:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[REV_IND_NEXT]] -; CHECK-NEXT: [[LD_PTR:%.*]] = load i32, ptr [[GEP_PTR_IND]], align 4 -; CHECK-NEXT: [[REDUX_NEXT]] = add i32 [[LD_PTR]], [[REDUX]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ne i32 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[END]] ; CHECK: [[END]]: -; CHECK-NEXT: [[REDUX_NEXT_LCSSA:%.*]] = phi i32 [ [[REDUX_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[REDUX_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP6]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 5790921..31129d3 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -37,22 +37,8 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[LOOPEND:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[ADD_I7:%.*]] = phi i64 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ADD_I]] = add i64 [[ADD_I7]], -1 -; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]] -; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 -; CHECK-NEXT: [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]] -; CHECK-NEXT: [[INC4]] = add i32 [[I_06]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]] ; CHECK: [[LOOPEND]]: -; CHECK-NEXT: [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[INC_REDUX_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body @@ -105,22 +91,8 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: br label %[[LOOPEND:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[ADD_I7:%.*]] = phi i128 [ [[STARTVAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[REDUX5:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[ADD_I]] = add i128 [[ADD_I7]], -1 -; CHECK-NEXT: [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]] -; CHECK-NEXT: [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4 -; CHECK-NEXT: [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]] -; CHECK-NEXT: [[INC4]] = add i32 [[I_06]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]] ; CHECK: [[LOOPEND]]: -; CHECK-NEXT: [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[INC_REDUX_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP13]] ; entry: br label %for.body @@ -263,19 +235,6 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[INC]] to i32 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[CMP]], label %[[WHILE_BODY]], label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ; @@ -329,19 +288,6 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[WHILE_END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[WHILE_BODY:.*]] -; CHECK: [[WHILE_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ -127, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ] -; CHECK-NEXT: [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1 -; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[INC]] to i32 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0 -; CHECK-NEXT: br i1 [[CMP]], label %[[WHILE_BODY]], label %[[WHILE_END]] ; CHECK: [[WHILE_END]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 79fdc07..f87be5a 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -429,13 +429,9 @@ define dso_local void @forced_optsize(ptr noalias nocapture readonly %x_p, ptr n ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; ; FORCED_OPTSIZE-LABEL: @forced_optsize( ; FORCED_OPTSIZE-NEXT: entry: diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index a43ea07d..c7b2704 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -19,60 +19,49 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[VECTOR_BODY]] ] ; CHECK-NEXT: br label [[EXIT_1:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_1_HEADER:%.*]] -; CHECK: loop.1.header: -; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_1_NEXT:%.*]], [[LOOP_1_LATCH:%.*]] ] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[LOOP_1_LATCH]], label [[LOOP_1_LATCH]] -; CHECK: loop.1.latch: -; CHECK-NEXT: [[L_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_1_HEADER]] ], [ [[L]], [[LOOP_1_HEADER]] ] -; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i32 [[IV_1]], 1 -; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[IV_1_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT_1]], label [[LOOP_1_HEADER]] ; CHECK: exit.1: -; CHECK-NEXT: [[L_LCSSA_LCSSA:%.*]] = phi i32 [ [[L_LCSSA]], [[LOOP_1_LATCH]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[L_LCSSA_LCSSA]], i32 -1) +; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[DOTLCSSA]], i32 -1) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX1]], 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH2:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: -; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[L_LCSSA_LCSSA]], i32 -1) +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 poison, i32 -1) ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMAX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = add i8 1, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i8 [[TMP5]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], 255 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] -; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH2]], label [[VECTOR_PH3:%.*]] -; CHECK: vector.ph3: +; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH2:%.*]] +; CHECK: vector.ph2: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[N_VEC]] to i8 -; CHECK-NEXT: br label [[VECTOR_BODY4:%.*]] -; CHECK: vector.body4: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i32 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT6:%.*]], [[VECTOR_BODY4]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX5]] to i8 +; CHECK-NEXT: br label [[VECTOR_BODY3:%.*]] +; CHECK: vector.body3: +; CHECK-NEXT: [[INDEX4:%.*]] = phi i32 [ 0, [[VECTOR_PH2]] ], [ [[INDEX_NEXT5:%.*]], [[VECTOR_BODY3]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP10]] ; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP11]], align 1 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX5]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY4]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: middle.block7: +; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX4]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT5]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK6:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: middle.block6: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH2]] -; CHECK: scalar.ph2: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK7]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_2:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP9]], [[MIDDLE_BLOCK6]] ], [ 0, [[EXIT_1]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH2]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] ; CHECK-NEXT: [[IV_2_NEXT]] = add i8 [[IV_2]], 1 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i8 [[IV_2_NEXT]] ; CHECK-NEXT: store i8 0, ptr [[GEP_A]], align 1 ; CHECK-NEXT: [[IV_2_SEXT:%.*]] = sext i8 [[IV_2]] to i32 -; CHECK-NEXT: [[EC_2:%.*]] = icmp sge i32 [[L_LCSSA_LCSSA]], [[IV_2_SEXT]] +; CHECK-NEXT: [[EC_2:%.*]] = icmp sge i32 [[DOTLCSSA]], [[IV_2_SEXT]] ; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP_2]], label [[EXIT_2]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: exit.2: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll index d620b92..92af828 100644 --- a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll @@ -20,21 +20,6 @@ define void @neg_cond(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P_GEP]], align 4 -; CHECK-NEXT: [[Q_GEP:%.*]] = getelementptr i32, ptr [[Q]], i32 [[IV]] -; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Q_GEP]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 42 -; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[CMP]], true -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT]], i32 42, i32 43 -; CHECK-NEXT: store i32 [[SEL]], ptr [[P_GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i32 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index b87cf90..f4d5a84 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -25,21 +25,8 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 undef ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ undef, [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop @@ -83,21 +70,8 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 poison ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ poison, [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop @@ -141,21 +115,8 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[SCALAR_PH:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[START]], [[SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 1 -; CHECK-NEXT: [[SELECT]] = select i1 [[C]], i64 [[RED]], i64 [[A]] -; CHECK-NEXT: [[ADD]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 32 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[PHI]] +; CHECK-NEXT: ret i64 [[RDX_SELECT]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 0fd780e..1f5646d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -36,22 +36,11 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]]) -; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[NEXT:%.*]] = phi i32 [ [[SEL:%.*]], [[LOOP]] ], [ 0, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[EXTRA_ITER]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SEL_COND:%.*]] = icmp sgt i32 [[NEXT]], 10 -; CHECK-NEXT: [[SEL]] = select i1 [[SEL_COND]], i32 [[NEXT]], i32 10 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[CHECK]] ], [ [[SEL_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[CHECK]] ], [ [[TMP5]], [[LOOP]] ] ; CHECK-NEXT: ret i32 [[RESULT]] ; entry: @@ -90,19 +79,9 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_PHI]]) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 12, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 -; CHECK-NEXT: [[RED_NEXT]] = mul i32 [[RED]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[RED_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[RED_LCSSA]] +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll index edad0b5..794e274 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll @@ -40,20 +40,8 @@ define noundef i32 @f(i32 noundef %g) { ; VF4IC2-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32 ; VF4IC2-NEXT: [[TMP16:%.*]] = add i32 0, [[TMP15]] ; VF4IC2-NEXT: br label %[[RETURN]] -; VF4IC2: [[SCALAR_PH:.*]]: -; VF4IC2-NEXT: br label %[[LOOP_HEADER:.*]] -; VF4IC2: [[LOOP_HEADER]]: -; VF4IC2-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF4IC2-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 -; VF4IC2-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] -; VF4IC2-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 -; VF4IC2-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] -; VF4IC2: [[LOOP_LATCH]]: -; VF4IC2-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; VF4IC2-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 -; VF4IC2-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] ; VF4IC2: [[RETURN]]: -; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT]] ] +; VF4IC2-NEXT: [[RES:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT]] ] ; VF4IC2-NEXT: ret i32 [[RES]] ; ; VF8IC1-LABEL: define noundef i32 @f( @@ -80,20 +68,8 @@ define noundef i32 @f(i32 noundef %g) { ; VF8IC1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 ; VF8IC1-NEXT: [[TMP7:%.*]] = add i32 0, [[TMP6]] ; VF8IC1-NEXT: br label %[[RETURN]] -; VF8IC1: [[SCALAR_PH:.*]]: -; VF8IC1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8IC1: [[LOOP_HEADER]]: -; VF8IC1-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8IC1-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3 -; VF8IC1-NEXT: [[SHR:%.*]] = ashr i32 [[G]], [[MUL]] -; VF8IC1-NEXT: [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0 -; VF8IC1-NEXT: br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]] -; VF8IC1: [[LOOP_LATCH]]: -; VF8IC1-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; VF8IC1-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8 -; VF8IC1-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] ; VF8IC1: [[RETURN]]: -; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ] +; VF8IC1-NEXT: [[RES:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ] ; VF8IC1-NEXT: ret i32 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index b1b3a3f..644900d 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -9,9 +9,9 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] @@ -22,7 +22,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_SPLIT]]: ; CHECK-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -31,22 +31,8 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] ; CHECK-NEXT: br label %[[LOOP_END]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP1:.*]] -; CHECK: [[LOOP1]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END]] -; CHECK: [[LOOP_INC]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP1]], label %[[LOOP_END]] ; CHECK: [[LOOP_END]]: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], %[[LOOP1]] ], [ -1, %[[LOOP_INC]] ], [ -1, %[[MIDDLE_BLOCK]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -331,9 +317,9 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IV_NEXT1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] -; CHECK-NEXT: br label %[[LOOP_HEADER1:.*]] -; CHECK: [[LOOP_HEADER1]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_HEADER1]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 @@ -343,10 +329,10 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] -; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_SPLIT:.*]], label %[[LOOP_HEADER1]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_SPLIT]]: -; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[LOOP_LATCH1:.*]] -; CHECK: [[LOOP_LATCH1]]: +; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[VECTOR_EARLY_EXIT]]: @@ -356,10 +342,10 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[IV1:%.*]] = phi ptr [ [[IV_NEXT1]], %[[LOOP_LATCH1]] ], [ [[A]], %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IV_NEXT1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[LOOP_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[IV1]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 ; CHECK-NEXT: [[C_0:%.*]] = icmp eq i16 [[L]], 0 ; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]] @@ -368,7 +354,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_n_not_zero_i16_p ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[A_END]] ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[EXIT_LOOPEXIT]]: -; CHECK-NEXT: [[P_PH:%.*]] = phi ptr [ [[A_END]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[A_END]], %[[LOOP_LATCH1]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[P_PH:%.*]] = phi ptr [ [[A_END]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[A_END]], %[[MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[P_PH]], %[[EXIT_LOOPEXIT]] ] diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index b630557..d8e62c7 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -43,24 +43,10 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4-NEXT: br label %[[E2:.*]] ; VF4IC4: [[VECTOR_EARLY_EXIT]]: ; VF4IC4-NEXT: br label %[[E1:.*]] -; VF4IC4: [[SCALAR_PH:.*]]: -; VF4IC4-NEXT: br label %[[LOOP_HEADER:.*]] -; VF4IC4: [[LOOP_HEADER]]: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[SCALAR_PH]] ] -; VF4IC4-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] -; VF4IC4-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 -; VF4IC4-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10 -; VF4IC4-NEXT: br i1 [[C_1]], label %[[E1]], label %[[LOOP_LATCH]] -; VF4IC4: [[LOOP_LATCH]]: -; VF4IC4-NEXT: [[INC]] = add nuw i64 [[IV]], 1 -; VF4IC4-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 -; VF4IC4-NEXT: br i1 [[C_2]], label %[[E2]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4IC4: [[E1]]: -; VF4IC4-NEXT: [[P1:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] -; VF4IC4-NEXT: ret i64 [[P1]] +; VF4IC4-NEXT: ret i64 0 ; VF4IC4: [[E2]]: -; VF4IC4-NEXT: [[P2:%.*]] = phi i64 [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ] -; VF4IC4-NEXT: ret i64 [[P2]] +; VF4IC4-NEXT: ret i64 1 ; entry: %src = alloca [128 x i32] @@ -94,6 +80,4 @@ e2: ; VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]} -; VF4IC4: [[META4]] = !{!"llvm.loop.interleave.count", i32 4} ;. diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll index 6836f7b..a50ce96 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-only.ll @@ -46,21 +46,9 @@ define i8 @iv_used_in_exit_with_math(i8 noundef %g) { ; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 ; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP19]] to i8 ; CHECK-NEXT: br label %[[RETURN]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[S:%.*]] = shl nuw i8 1, [[IV]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[S]], [[G]] -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[RETURN]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 4 -; CHECK-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[RES_IV1:%.*]] = phi i8 [ 32, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ 32, %[[MIDDLE_BLOCK]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: [[RES_IV2:%.*]] = phi i8 [ 0, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP23]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RES_IV1:%.*]] = phi i8 [ 32, %[[MIDDLE_BLOCK]] ], [ [[TMP20]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RES_IV2:%.*]] = phi i8 [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP23]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: [[RES:%.*]] = add i8 [[RES_IV1]], [[RES_IV2]] ; CHECK-NEXT: ret i8 [[RES]] ; @@ -125,21 +113,9 @@ define i32 @iv_used_in_exit_with_loads(ptr align 4 dereferenceable(128) %src) { ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 ; CHECK-NEXT: [[TMP29:%.*]] = add i32 [[INDEX]], [[TMP28]] ; CHECK-NEXT: br label %[[RETURN]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_LATCH]], label %[[RETURN]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 32 -; CHECK-NEXT: br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]] ; CHECK: [[RETURN]]: -; CHECK-NEXT: [[RES_IV1:%.*]] = phi i32 [ 32, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ 32, %[[MIDDLE_BLOCK]] ], [ [[TMP29]], %[[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: [[RES_IV2:%.*]] = phi i32 [ 0, %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP29]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RES_IV1:%.*]] = phi i32 [ 32, %[[MIDDLE_BLOCK]] ], [ [[TMP29]], %[[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RES_IV2:%.*]] = phi i32 [ 0, %[[MIDDLE_BLOCK]] ], [ [[TMP29]], %[[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[RES_IV1]], [[RES_IV2]] ; CHECK-NEXT: ret i32 [[RES]] ; diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index a4ce68f..ed5dcc7 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -42,25 +42,11 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: middle.block: ; VF4IC4-NEXT: br label [[E2:%.*]] ; VF4IC4: vector.early.exit: -; VF4IC4-NEXT: br label [[E1:%.*]] -; VF4IC4: scalar.ph: ; VF4IC4-NEXT: br label [[LOOP_HEADER:%.*]] -; VF4IC4: loop.header: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] -; VF4IC4-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 -; VF4IC4-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10 -; VF4IC4-NEXT: br i1 [[C_1]], label [[E1]], label [[LOOP_LATCH]] -; VF4IC4: loop.latch: -; VF4IC4-NEXT: [[INC]] = add nuw i64 [[IV]], 1 -; VF4IC4-NEXT: [[C_2:%.*]] = icmp eq i64 [[INC]], 128 -; VF4IC4-NEXT: br i1 [[C_2]], label [[E2]], label [[LOOP_HEADER]] ; VF4IC4: e1: -; VF4IC4-NEXT: [[P1:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ 0, [[VECTOR_EARLY_EXIT]] ] -; VF4IC4-NEXT: ret i64 [[P1]] +; VF4IC4-NEXT: ret i64 0 ; VF4IC4: e2: -; VF4IC4-NEXT: [[P2:%.*]] = phi i64 [ 1, [[LOOP_LATCH]] ], [ 1, [[MIDDLE_BLOCK]] ] -; VF4IC4-NEXT: ret i64 [[P2]] +; VF4IC4-NEXT: ret i64 1 ; entry: %src = alloca [128 x i32] @@ -155,22 +141,8 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] -; VF4IC4: scalar.ph: -; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 -; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] -; VF4IC4-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 67 -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] +; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; VF4IC4-NEXT: ret i64 [[RETVAL]] ; entry: @@ -256,19 +228,8 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[TMP6]] ; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP7]] ; VF4IC4-NEXT: br label [[LOOP_END]] -; VF4IC4: scalar.ph: -; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[PTR_NEXT]], [[PTREND]] -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTR]], [[LOOP]] ], [ [[PTREND]], [[LOOP_INC]] ], [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[TMP8]], [[VECTOR_EARLY_EXIT]] ] +; VF4IC4-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[TMP8]], [[VECTOR_EARLY_EXIT]] ] ; VF4IC4-NEXT: ret ptr [[RETVAL]] ; entry: @@ -360,22 +321,8 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] ; VF4IC4-NEXT: br label [[LOOP_END]] -; VF4IC4: scalar.ph: -; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 -; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] -; VF4IC4-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 67 -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i64 [ [[IV]], [[LOOP]] ], [ [[IV_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] +; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; VF4IC4-NEXT: ret i64 [[RETVAL]] ; entry: @@ -470,27 +417,11 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] -; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; VF4IC4: scalar.ph: ; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 -; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] -; VF4IC4-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 67 -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; VF4IC4: loop.early.exit: -; VF4IC4-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[IV]], [[LOOP]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] -; VF4IC4-NEXT: ret i64 [[RETVAL1]] +; VF4IC4-NEXT: ret i64 [[TMP10]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ] -; VF4IC4-NEXT: ret i64 [[RETVAL2]] +; VF4IC4-NEXT: ret i64 67 ; entry: %p1 = alloca [1024 x i8] @@ -588,27 +519,11 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[TMP8:%.*]] = select i1 [[TMP32]], i64 [[TMP31]], i64 [[TMP29]] ; VF4IC4-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] ; VF4IC4-NEXT: [[TMP10:%.*]] = add i64 3, [[TMP9]] -; VF4IC4-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; VF4IC4: scalar.ph: ; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 -; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] -; VF4IC4-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 67 -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; VF4IC4: loop.early.exit: -; VF4IC4-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[IV]], [[LOOP]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] -; VF4IC4-NEXT: ret i64 [[RETVAL1]] +; VF4IC4-NEXT: ret i64 [[TMP10]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ] -; VF4IC4-NEXT: ret i64 [[RETVAL2]] +; VF4IC4-NEXT: ret i64 67 ; entry: %p1 = alloca [1024 x i8] @@ -847,22 +762,8 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4-NEXT: [[TMP41:%.*]] = icmp uge i64 [[TMP8]], 12 ; VF4IC4-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i8 [[TMP40]], i8 [[TMP38]] ; VF4IC4-NEXT: br label [[LOOP_END]] -; VF4IC4: scalar.ph: -; VF4IC4-NEXT: br label [[LOOP:%.*]] -; VF4IC4: loop: -; VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[SCALAR_PH:%.*]] ] -; VF4IC4-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] -; VF4IC4-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 -; VF4IC4-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] -; VF4IC4-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 -; VF4IC4-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; VF4IC4-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; VF4IC4: loop.inc: -; VF4IC4-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF4IC4-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; VF4IC4-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]] ; VF4IC4: loop.end: -; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], [[LOOP]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP42]], [[VECTOR_EARLY_EXIT]] ] +; VF4IC4-NEXT: [[RETVAL:%.*]] = phi i8 [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP42]], [[VECTOR_EARLY_EXIT]] ] ; VF4IC4-NEXT: ret i8 [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 219c66f..3bb39b9 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -29,28 +29,7 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: br label [[LOOP_COND:%.*]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -112,29 +91,7 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -201,26 +158,7 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i16 [[LV]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -297,29 +235,7 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.cond: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]] -; CHECK-NEXT: [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1 -; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] -; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] -; CHECK: loop.next: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]] -; CHECK-NEXT: store i16 [[RES]], ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 -; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -371,20 +287,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[P]] -; CHECK-NEXT: store i32 [[P]], ptr [[GEP_PTR]], align 4 -; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 04f04a8..3500c5c 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -34,22 +34,8 @@ define i64 @same_exit_block_phi_of_consts() { ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ], [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -108,21 +94,7 @@ define i64 @diff_exit_block_phi_of_consts() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: ; CHECK-NEXT: ret i64 0 ; CHECK: loop.end: @@ -292,16 +264,7 @@ define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: br label [[EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[SCALAR_PH:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT]] -; CHECK: for.inc: -; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: early.exit: ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 54408b2..79821b8 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -36,22 +36,8 @@ define i64 @same_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -116,24 +102,8 @@ define i32 @same_exit_block_pre_inc_use1_iv64_endi32_step2() { ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[DOTCAST]], 2 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[INDEX2_NEXT]] = add i32 [[INDEX2]], 2 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i32 [[RETVAL]] ; entry: @@ -197,23 +167,8 @@ define i32 @same_exit_block_pre_inc_use1_iv128_endi32_step2() { ; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[DOTCAST]], 2 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i32 9, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i128 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i32 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9, [[SCALAR_PH]] ] -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC]] ], [ [[P1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 3 -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i128 [[INDEX]], 1 -; CHECK-NEXT: [[INDEX2_NEXT]] = add i32 [[INDEX2]], 2 -; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i128 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i32 [[RETVAL]] ; entry: @@ -277,24 +232,8 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi float [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ 9.000000e+00, [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[INDEX2_NEXT]] = fadd fast float [[INDEX2]], 1.000000e+00 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi float [ [[INDEX2]], [[LOOP]] ], [ 1.230000e+02, [[LOOP_INC]] ], [ 1.230000e+02, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi float [ 1.230000e+02, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret float [[RETVAL]] ; entry: @@ -360,24 +299,8 @@ define ptr @same_exit_block_pre_inc_use1_iv64_endptr() { ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 5 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP20]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi ptr [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[P2]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[INDEX2_NEXT]] = getelementptr i8, ptr [[INDEX2]], i64 5 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[INDEX2]], [[LOOP]] ], [ [[P1]], [[LOOP_INC]] ], [ [[P1]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[P1]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret ptr [[RETVAL]] ; entry: @@ -438,19 +361,8 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[PTR_NEXT]], [[PTREND]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTR]], [[LOOP]] ], [ [[PTREND]], [[LOOP_INC]] ], [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret ptr [[RETVAL]] ; entry: @@ -512,23 +424,8 @@ define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: [[CMP4:%.*]] = select i1 [[COND]], i1 [[CMP3]], i1 false -; CHECK-NEXT: br i1 [[CMP4]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -592,22 +489,8 @@ define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -670,22 +553,8 @@ define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -745,22 +614,8 @@ define i64 @same_exit_block_pre_inc_use2() { ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 66, [[MIDDLE_BLOCK]] ], [ 67, [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -823,22 +678,8 @@ define i64 @same_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] ; entry: @@ -902,20 +743,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP8]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -976,22 +805,8 @@ define i64 @same_exit_block_post_inc_use() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -1051,19 +866,8 @@ define ptr @same_exit_block_post_inc_use1_ivptr() { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ [[PTR_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[P1]], [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR]], align 1 -; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], 72 -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[PTR_NEXT]], [[PTREND]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTR_NEXT]], [[LOOP]] ], [ [[PTREND]], [[LOOP_INC]] ], [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi ptr [ [[PTREND]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret ptr [[RETVAL]] ; entry: @@ -1123,22 +927,8 @@ define i64 @same_exit_block_post_inc_use2() { ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 66, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -1200,27 +990,11 @@ define i64 @diff_exit_block_pre_inc_use1() { ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK-NEXT: ret i64 [[EARLY_EXIT_VALUE]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] +; CHECK-NEXT: ret i64 67 ; entry: %p1 = alloca [1024 x i8] @@ -1282,27 +1056,11 @@ define i64 @diff_exit_block_pre_inc_use2() { ; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP_END:%.*]] ; CHECK: vector.early.exit: -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ], [ 67, [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK-NEXT: ret i64 67 ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] +; CHECK-NEXT: ret i64 66 ; entry: %p1 = alloca [1024 x i8] @@ -1367,27 +1125,11 @@ define i64 @diff_exit_block_pre_inc_use3() { ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX2]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] +; CHECK-NEXT: ret i64 [[EARLY_EXIT_VALUE]] ; CHECK: loop.end: -; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]] +; CHECK-NEXT: ret i64 66 ; entry: %p1 = alloca [1024 x i8] @@ -1450,27 +1192,11 @@ define i64 @diff_exit_block_post_inc_use1() { ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK-NEXT: ret i64 [[EARLY_EXIT_VALUE]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] +; CHECK-NEXT: ret i64 67 ; entry: %p1 = alloca [1024 x i8] @@ -1536,27 +1262,11 @@ define i64 @diff_exit_block_post_inc_use2() { ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 3, [[TMP11]] -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[TMP21]], [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK-NEXT: ret i64 [[TMP21]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] +; CHECK-NEXT: ret i64 66 ; entry: %p1 = alloca [1024 x i8] @@ -1624,29 +1334,11 @@ define i64 @diff_exit_block_post_inc_use3(i64 %start) { ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 1 ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 [[START]], [[TMP12]] -; CHECK-NEXT: br label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX2_NEXT:%.*]], [[LOOP_INC]] ], [ [[START]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[INDEX2_NEXT]] = add i64 [[INDEX2]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX2_NEXT]], [[LOOP]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK-NEXT: ret i64 [[EARLY_EXIT_VALUE]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX2]], [[LOOP_INC]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] +; CHECK-NEXT: ret i64 [[IND_ESCAPE]] ; entry: %p1 = alloca [1024 x i8] @@ -1713,21 +1405,8 @@ define i64 @loop_contains_safe_call() { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -1788,21 +1467,8 @@ define i64 @loop_contains_safe_div() { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP9]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -1864,22 +1530,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align( ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: @@ -2071,22 +1723,8 @@ define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = add i64 3, [[TMP10]] ; CHECK-NEXT: br label [[LOOP_END]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ], [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll index 66300ed..19ab96d 100644 --- a/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/store-reduction-results-in-tail-folded-loop.ll @@ -41,18 +41,7 @@ define void @pr75298_store_reduction_value_in_folded_loop(i64 %iv.start) optsize ; CHECK: middle.block: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) ; CHECK-NEXT: store i32 [[TMP6]], ptr @a, align 4 -; CHECK-NEXT: br label [[EXIT_LOOPEXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_START]], [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr @c, align 4 -; CHECK-NEXT: [[RED_NEXT]] = xor i32 [[RED]], [[L]] -; CHECK-NEXT: store i32 [[RED_NEXT]], ptr @a, align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 7 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT_LOOPEXIT]], label [[LOOP]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll index 7027d85..ca32808 100644 --- a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll @@ -23,19 +23,9 @@ define float @pr70988() { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RDX_NEXT]] = fadd contract float [[RDX]], 1.000000e+00 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[INDEX_NEXT]], 1021 -; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[DOTLCSSA]] +; CHECK-NEXT: ret float [[TMP5]] ; ; CHECK-ALM-LABEL: define float @pr70988() { ; CHECK-ALM-NEXT: entry: @@ -56,19 +46,9 @@ define float @pr70988() { ; CHECK-ALM-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-ALM-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br label [[EXIT:%.*]] -; CHECK-ALM: scalar.ph: ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] -; CHECK-ALM: loop: -; CHECK-ALM-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[RDX_NEXT]] = fadd contract float [[RDX]], 1.000000e+00 -; CHECK-ALM-NEXT: [[INDEX_NEXT]] = add nuw nsw i32 [[INDEX]], 1 -; CHECK-ALM-NEXT: [[COND:%.*]] = icmp ult i32 [[INDEX_NEXT]], 1021 -; CHECK-ALM-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT]] ; CHECK-ALM: exit: -; CHECK-ALM-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-ALM-NEXT: ret float [[DOTLCSSA]] +; CHECK-ALM-NEXT: ret float [[TMP5]] ; entry: br label %loop @@ -123,21 +103,9 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[NARROW:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[NARROW]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i32 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-NEXT: [[RDX_NEXT]] = fadd contract float [[RDX]], [[L]] -; CHECK-NEXT: [[EC:%.*]] = icmp ult i32 [[NARROW]], 15 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[DOTLCSSA]] +; CHECK-NEXT: ret float [[TMP13]] ; ; CHECK-ALM-LABEL: define float @pr72720reduction_using_active_lane_mask( ; CHECK-ALM-SAME: ptr [[SRC:%.*]]) { @@ -173,21 +141,9 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-ALM-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-ALM-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br label [[EXIT:%.*]] -; CHECK-ALM: scalar.ph: ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] -; CHECK-ALM: loop: -; CHECK-ALM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[NARROW:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[RDX:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[NARROW]] = add nuw nsw i32 [[IV]], 1 -; CHECK-ALM-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i32 [[IV]] -; CHECK-ALM-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4 -; CHECK-ALM-NEXT: [[RDX_NEXT]] = fadd contract float [[RDX]], [[L]] -; CHECK-ALM-NEXT: [[EC:%.*]] = icmp ult i32 [[NARROW]], 15 -; CHECK-ALM-NEXT: br i1 [[EC]], label [[LOOP]], label [[EXIT]] ; CHECK-ALM: exit: -; CHECK-ALM-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] -; CHECK-ALM-NEXT: ret float [[DOTLCSSA]] +; CHECK-ALM-NEXT: ret float [[TMP11]] ; entry: br label %loop @@ -229,19 +185,9 @@ define float @fadd_reduction_with_live_in(float %inc) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1002 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[SUM_NEXT]] = fadd float [[SUM]], [[INC]] -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: -; CHECK-NEXT: [[LCSSA:%.*]] = phi float [ [[SUM_NEXT]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret float [[LCSSA]] +; CHECK-NEXT: ret float [[TMP5]] ; ; CHECK-ALM-LABEL: define float @fadd_reduction_with_live_in( ; CHECK-ALM-SAME: float [[INC:%.*]]) { @@ -263,19 +209,9 @@ define float @fadd_reduction_with_live_in(float %inc) { ; CHECK-ALM-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1002 ; CHECK-ALM-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ALM: middle.block: -; CHECK-ALM-NEXT: br label [[EXIT:%.*]] -; CHECK-ALM: scalar.ph: ; CHECK-ALM-NEXT: br label [[LOOP:%.*]] -; CHECK-ALM: loop: -; CHECK-ALM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[SUM:%.*]] = phi float [ 0.000000e+00, [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[LOOP]] ] -; CHECK-ALM-NEXT: [[SUM_NEXT]] = fadd float [[SUM]], [[INC]] -; CHECK-ALM-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-ALM-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1000 -; CHECK-ALM-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK-ALM: exit: -; CHECK-ALM-NEXT: [[LCSSA:%.*]] = phi float [ [[SUM_NEXT]], [[LOOP]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] -; CHECK-ALM-NEXT: ret float [[LCSSA]] +; CHECK-ALM-NEXT: ret float [[TMP5]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll index 97f686c..dcab18f 100644 --- a/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll @@ -22,16 +22,6 @@ define void @test_variable_stride(ptr %dst, i32 %scale) { ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IDX:%.*]] = mul i32 [[IV]], [[SCALE]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i32 [[IDX]] -; CHECK-NEXT: store i32 [[IV]], ptr [[GEP]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 87eebb7b..a852b73 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -54,16 +54,6 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll index 4bc4e54..00e04c7 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-optimize-vector-induction-width.ll @@ -34,15 +34,6 @@ define void @canonical_small_tc_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 15 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -94,15 +85,6 @@ define void @canonical_upper_limit_i8(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 255 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -154,15 +136,6 @@ define void @canonical_lower_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 257 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -214,15 +187,6 @@ define void @canonical_upper_limit_i16(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65535 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -274,15 +238,6 @@ define void @canonical_lower_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 65537 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -334,15 +289,6 @@ define void @canonical_upper_limit_i32(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967295 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -394,15 +340,6 @@ define void @canonical_lower_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 4294967297 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -454,15 +391,6 @@ define void @canonical_upper_limit_i64(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], -1 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; @@ -514,15 +442,6 @@ define void @canonical_lower_limit_i128(ptr nocapture noundef writeonly %p) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[END:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i256 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_IV:%.*]] = getelementptr inbounds i16, ptr [[P]], i256 [[IV]] -; CHECK-NEXT: store i16 1, ptr [[P_IV]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i256 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i256 [[IV_NEXT]], 18446744073709551617 -; CHECK-NEXT: br i1 [[COND]], label %[[END]], label %[[LOOP]] ; CHECK: [[END]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index 6fd7c70..b6f43aa 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -55,22 +55,6 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: switch i32 [[TMP0]], label %[[LOOP_LATCH]] [ -; CHECK-NEXT: i32 0, label %[[LOOP_LATCH]] -; CHECK-NEXT: i32 1, label %[[IF_THEN:.*]] -; CHECK-NEXT: ] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 4 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 45c56a0..3bc5da1 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -53,18 +53,9 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 0, ptr [[DST_PTR]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; entry: br label %for.body @@ -132,17 +123,9 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[ADDR:%.*]] = phi ptr [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[PTR1]], [[SCALAR_PH:%.*]] ] -; CHECK-NEXT: store double 0.000000e+00, ptr [[ADDR]], align 8 -; CHECK-NEXT: [[PTR]] = getelementptr inbounds double, ptr [[ADDR]], i64 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[PTR]], [[PTR2]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; entry: %ptr2 = getelementptr inbounds double, ptr %ptr1, i64 15 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 387a02e..8a16293 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -133,26 +133,7 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i16, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[I2:%.*]] = load i16, ptr [[GEP_SRC_1]], align 2 -; CHECK-NEXT: [[I3:%.*]] = sext i16 [[I2]] to i32 -; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i32 0, [[I3]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i8, ptr [[SRC_2]], i64 [[IV]] -; CHECK-NEXT: [[I4:%.*]] = load i8, ptr [[GEP_SRC_2]], align 2 -; CHECK-NEXT: [[I5:%.*]] = zext i8 [[I4]] to i32 -; CHECK-NEXT: [[I6:%.*]] = select i1 [[C_1]], i32 0, i32 [[I5]] -; CHECK-NEXT: [[I7:%.*]] = and i32 [[I6]], 0 -; CHECK-NEXT: [[I8:%.*]] = trunc nuw nsw i32 [[I7]] to i16 -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i16 [[I8]], ptr [[GEP_DST]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll index 83ecf1a..6e7cdba 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-loads-p16.ll @@ -26,21 +26,7 @@ define void @pr77468(ptr noalias %src, ptr noalias %dst, i1 %x) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i16 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 -; CHECK-NEXT: [[X_EXT:%.*]] = zext i1 [[X]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X_EXT]], [[L]] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i16 [[IV]] -; CHECK-NEXT: [[T:%.*]] = trunc i32 [[AND]] to i16 -; CHECK-NEXT: store i16 [[T]], ptr [[GEP_DST]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll index 2f5f157..2aebb73 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-reductions.ll @@ -18,11 +18,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[AND_LCSSA_OFF0:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]]) ; CHECK-NEXT: ret i8 [[AND_LCSSA_OFF0]] @@ -64,11 +60,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP2]]) ; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] @@ -110,11 +102,7 @@ define i16 @reduction_xor_trunc(ptr noalias nocapture %ptr) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[XOR_LCSSA_OFF0:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP2]]) ; CHECK-NEXT: ret i16 [[XOR_LCSSA_OFF0]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index 4a372b5..498c58d 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -24,20 +24,7 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -81,20 +68,7 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = shl i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -138,20 +112,7 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = ashr i32 [[F]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -195,22 +156,7 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = shl i32 19, [[ZEXT]] -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -257,22 +203,7 @@ define void @test_lshr_by_18(ptr %A) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[LV_EXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[LV_EXT]], 18 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -318,22 +249,7 @@ define void @test_lshr_by_4(ptr %A) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV_EXT:%.*]] = zext i8 [[IV]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_EXT]] -; CHECK-NEXT: [[LV:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[LV_EXT:%.*]] = zext i8 [[LV]] to i32 -; CHECK-NEXT: [[L:%.*]] = lshr i32 [[LV_EXT]], 4 -; CHECK-NEXT: [[L_T:%.*]] = trunc i32 [[L]] to i8 -; CHECK-NEXT: store i8 [[L_T]], ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IV_NEXT]] to i32 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[CONV]], 100 -; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll index d6273e0..b85f274 100644 --- a/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll +++ b/llvm/test/Transforms/LoopVectorize/uitofp-preserve-nneg.ll @@ -22,19 +22,7 @@ define void @uitofp_preserve_nneg(ptr %result, i32 %size, float %y) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[FOR_EXIT:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER4:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[TMP4]] to float -; CHECK-NEXT: [[TMP5:%.*]] = fmul float [[CONV]], [[Y]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = zext nneg i32 [[TMP4]] to i64 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[RESULT]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP5]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[TMP4]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 256 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_EXIT]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index ccb301f..985a9a2 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -21,21 +21,6 @@ define void @blend_uniform_iv_trunc(i1 %c) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] -; CHECK: [[LOOP_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ poison, %[[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], %[[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[BLEND]] -; CHECK-NEXT: store i16 0, ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -79,20 +64,6 @@ define void @blend_uniform_iv(i1 %c) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] -; CHECK: [[LOOP_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[BLEND:%.*]] = phi i64 [ poison, %[[LOOP_HEADER]] ], [ [[IV]], %[[LOOP_NEXT]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[BLEND]] -; CHECK-NEXT: store i16 0, ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -150,25 +121,6 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT:.*]], label %[[LOOP_LATCH]] -; CHECK: [[LOOP_NEXT]]: -; CHECK-NEXT: br i1 [[C]], label %[[LOOP_NEXT_2:.*]], label %[[LOOP_NEXT_3:.*]] -; CHECK: [[LOOP_NEXT_2]]: -; CHECK-NEXT: br label %[[LOOP_NEXT_3]] -; CHECK: [[LOOP_NEXT_3]]: -; CHECK-NEXT: [[BLEND_1:%.*]] = phi i64 [ undef, %[[LOOP_NEXT]] ], [ [[IV]], %[[LOOP_NEXT_2]] ] -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[BLEND:%.*]] = phi i64 [ undef, %[[LOOP_HEADER]] ], [ [[BLEND_1]], %[[LOOP_NEXT_3]] ] -; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[BLEND]] -; CHECK-NEXT: store i16 0, ptr [[DST_PTR]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 -; CHECK-NEXT: br i1 [[CMP439]], label %[[LOOP_HEADER]], label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -275,22 +227,6 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_IV:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_IV]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], 10 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[L]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ [[ADD]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[RES:%.*]] = add i32 [[P_1]], [[P_2]] -; CHECK-NEXT: store i32 [[RES]], ptr [[GEP_IV]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 2c49fda..571c55c 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -24,7 +24,8 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -64,10 +65,11 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -112,10 +114,11 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -167,10 +170,11 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -214,10 +218,11 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -269,10 +274,11 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -324,7 +330,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -379,7 +385,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -426,7 +432,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -467,7 +473,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -516,7 +522,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -565,7 +571,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -621,7 +627,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -669,7 +675,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -725,7 +731,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -781,7 +787,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -837,7 +843,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -885,7 +891,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -933,10 +939,11 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index c7525fb..6cf82fc 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -24,7 +24,8 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -64,10 +65,11 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -112,10 +114,11 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -167,10 +170,11 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -212,10 +216,11 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -267,7 +272,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -322,7 +327,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -371,7 +376,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -427,7 +432,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -483,7 +488,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -539,7 +544,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 27cefa2..9ed2240 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -58,7 +58,8 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -130,10 +131,11 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -203,10 +205,11 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop @@ -247,10 +250,11 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index cee53b5..2b5d0f3 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -25,7 +25,8 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr0_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -46,7 +47,8 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -86,10 +88,11 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr1_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -123,10 +126,11 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -166,10 +170,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr2_step1_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -189,10 +194,11 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -244,10 +250,11 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr0_step2_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -296,10 +303,11 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -343,10 +351,11 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP8]], ptr [[TMP6]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_lshr1_step2_start0_ind1 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -379,10 +388,11 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP14]], ptr [[TMP10]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -434,7 +444,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -486,7 +496,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -541,7 +551,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF2-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -593,7 +603,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF4-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -643,7 +653,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -681,7 +691,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF4-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -729,7 +739,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store i64 [[TMP9]], ptr [[TMP7]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -766,7 +776,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: store i64 [[TMP15]], ptr [[TMP11]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -822,7 +832,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -875,7 +885,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -931,7 +941,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF2-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -984,7 +994,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF4-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 0f8289d..12851d7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -35,7 +35,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div1_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -76,7 +77,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -121,10 +123,11 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div2_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -162,10 +165,11 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -218,10 +222,11 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div3_step1_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -259,10 +264,11 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -322,10 +328,11 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div1_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -378,10 +385,11 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -441,10 +449,11 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div2_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -497,10 +506,11 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -560,10 +570,11 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[EXIT:%.*]] -; VF2: scalar.ph: +; VF2: exit: +; VF2-NEXT: ret void ; ; VF4-LABEL: define void @ld_div3_step2_start0_ind2 ; VF4-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { @@ -616,10 +627,11 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[EXIT:%.*]] -; VF4: scalar.ph: +; VF4: exit: +; VF4-NEXT: ret void ; entry: br label %loop @@ -679,7 +691,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -735,7 +747,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -798,7 +810,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -854,7 +866,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -917,7 +929,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -973,7 +985,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VF4-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1030,7 +1042,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1072,7 +1084,7 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1129,7 +1141,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1171,7 +1183,7 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1228,7 +1240,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 -; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1270,7 +1282,7 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 -; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; VF4-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1334,7 +1346,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1391,7 +1403,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1455,7 +1467,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1512,7 +1524,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1576,7 +1588,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1633,7 +1645,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1697,7 +1709,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1754,7 +1766,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1818,7 +1830,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1875,7 +1887,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: @@ -1939,7 +1951,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6) ; VF2-NEXT: [[VEC_IND_NEXT2]] = add <2 x i64> [[VEC_IND1]], splat (i64 2) ; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; VF2-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF2: middle.block: ; VF2-NEXT: br label [[SCALAR_PH:%.*]] ; VF2: scalar.ph: @@ -1996,7 +2008,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; VF4-NEXT: [[VEC_IND_NEXT2]] = add <4 x i64> [[VEC_IND1]], splat (i64 4) ; VF4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332 -; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; VF4-NEXT: br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; VF4: middle.block: ; VF4-NEXT: br label [[SCALAR_PH:%.*]] ; VF4: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index 5f83e392..5d07341 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -23,26 +23,7 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] -; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[ELSE_1:%.*]] -; CHECK: else.1: -; CHECK-NEXT: br i1 false, label [[THEN_2:%.*]], label [[ELSE_2:%.*]] -; CHECK: then.2: -; CHECK-NEXT: br label [[ELSE_2]] -; CHECK: else.2: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ [[B]], [[ELSE_2]] ], [ poison, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -98,29 +79,7 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 % ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] -; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[ELSE_1:%.*]] -; CHECK: else.1: -; CHECK-NEXT: [[C_1:%.*]] = icmp ult i16 [[L_0]], [[Y]] -; CHECK-NEXT: br i1 [[C_1]], label [[THEN_2:%.*]], label [[ELSE_2:%.*]] -; CHECK: then.2: -; CHECK-NEXT: br label [[ELSE_2]] -; CHECK: else.2: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ [[B]], [[ELSE_2]] ], [ poison, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -179,29 +138,7 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] -; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]] -; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[ELSE_1:%.*]] -; CHECK: else.1: -; CHECK-NEXT: [[C_1:%.*]] = icmp ult i16 [[L_0]], [[Y]] -; CHECK-NEXT: br i1 [[C_1]], label [[THEN_2:%.*]], label [[ELSE_2:%.*]] -; CHECK: then.2: -; CHECK-NEXT: br label [[ELSE_2]] -; CHECK: else.2: -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ poison, [[LOOP_HEADER]] ], [ [[B]], [[ELSE_2]] ] -; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2 -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 -; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2 -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 462865d..8da1dca 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -31,20 +31,8 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF1-NEXT: br label %[[EXIT:.*]] ; VF8UF1: [[VECTOR_EARLY_EXIT]]: ; VF8UF1-NEXT: br label %[[EXIT]] -; VF8UF1: [[SCALAR_PH:.*]]: -; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8UF1: [[LOOP_HEADER]]: -; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 -; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF8UF1: [[LOOP_LATCH]]: -; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 -; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF8UF1: [[EXIT]]: -; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF8UF1-NEXT: ret i8 [[RES]] ; ; VF8UF2-LABEL: define i8 @test_early_exit_max_tc_less_than_16( @@ -70,20 +58,8 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2-NEXT: br label %[[EXIT:.*]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: br label %[[EXIT]] -; VF8UF2: [[SCALAR_PH:.*]]: -; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 -; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF8UF2: [[LOOP_LATCH]]: -; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i8 [[RES]] ; ; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16( @@ -104,20 +80,8 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1-NEXT: br label %[[EXIT:.*]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: ; VF16UF1-NEXT: br label %[[EXIT]] -; VF16UF1: [[SCALAR_PH:.*]]: -; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF16UF1: [[LOOP_HEADER]]: -; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 -; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF16UF1: [[LOOP_LATCH]]: -; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 -; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF16UF1-NEXT: ret i8 [[RES]] ; entry: @@ -166,20 +130,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) ; VF8UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] ; VF8UF1-NEXT: br label %[[EXIT]] -; VF8UF1: [[SCALAR_PH:.*]]: -; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8UF1: [[LOOP_HEADER]]: -; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 -; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF8UF1: [[LOOP_LATCH]]: -; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 -; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF8UF1: [[EXIT]]: -; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ] +; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ] ; VF8UF1-NEXT: ret i64 [[RES]] ; ; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( @@ -212,20 +164,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[TMP7]] ; VF8UF2-NEXT: [[TMP12:%.*]] = add i64 0, [[TMP11]] ; VF8UF2-NEXT: br label %[[EXIT]] -; VF8UF2: [[SCALAR_PH:.*]]: -; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] -; VF8UF2: [[LOOP_HEADER]]: -; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 -; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF8UF2: [[LOOP_LATCH]]: -; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 -; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i64 [[RES]] ; ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( @@ -248,20 +188,8 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true) ; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] -; VF16UF1: [[SCALAR_PH:.*]]: -; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] -; VF16UF1: [[LOOP_HEADER]]: -; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]] -; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1 -; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 -; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] -; VF16UF1: [[LOOP_LATCH]]: -; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 -; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]] ; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ] ; VF16UF1-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index d013584..2317af5 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -17,18 +17,8 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store ptr null, ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ], [ 15, %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 15 ; entry: br label %loop @@ -60,18 +50,8 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store ptr null, ptr [[GEP]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ], [ 16, %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 16 ; entry: br label %loop @@ -105,19 +85,8 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -8 ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[DST]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 -; CHECK-NEXT: [[INT_IV_NEXT]] = add i64 [[INT_IV]], 1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[INT_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV]], %[[LOOP]] ], [ [[IND_ESCAPE]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret ptr [[RES]] +; CHECK-NEXT: ret ptr [[IND_ESCAPE]] ; entry: br label %loop @@ -151,19 +120,8 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[DST]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[INT_IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INT_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: store ptr null, ptr [[PTR_IV]], align 8 -; CHECK-NEXT: [[INT_IV_NEXT]] = add i64 [[INT_IV]], 1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[INT_IV_NEXT]], 16 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret ptr [[RES]] +; CHECK-NEXT: ret ptr [[TMP0]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 5f86469..e160a15 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -176,15 +176,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: br label %[[EXIT:.*]] -; VF8UF1: [[SCALAR_PH:.*]]: -; VF8UF1-NEXT: br label %[[LOOP:.*]] -; VF8UF1: [[LOOP]]: -; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]] -; VF8UF1-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF8UF1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF8UF1-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF8UF1: [[EXIT]]: ; VF8UF1-NEXT: ret void ; @@ -316,15 +307,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] -; VF8UF2: [[SCALAR_PH:.*]]: -; VF8UF2-NEXT: br label %[[LOOP:.*]] -; VF8UF2: [[LOOP]]: -; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]] -; VF8UF2-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF8UF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF8UF2-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: ret void ; @@ -455,15 +437,6 @@ define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] -; VF16UF1: [[SCALAR_PH:.*]]: -; VF16UF1-NEXT: br label %[[LOOP:.*]] -; VF16UF1: [[LOOP]]: -; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 2, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]] -; VF16UF1-NEXT: store i16 0, ptr [[GEP_DST]], align 2 -; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; VF16UF1-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF16UF1-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: ret void ; @@ -728,23 +701,14 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF1: [[PRED_STORE_IF13]]: ; VF8UF1-NEXT: [[TMP40:%.*]] = mul i64 7, [[STEP]] ; VF8UF1-NEXT: [[TMP41:%.*]] = add i64 0, [[TMP40]] -; VF8UF1-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]] -; VF8UF1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]] -; VF8UF1-NEXT: store i8 0, ptr [[TMP43]], align 1 +; VF8UF1-NEXT: [[IV_NEXT:%.*]] = add i64 [[TMP41]], [[STEP]] +; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]] +; VF8UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1 ; VF8UF1-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; VF8UF1: [[PRED_STORE_CONTINUE14]]: ; VF8UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: br label %[[EXIT:.*]] -; VF8UF1: [[SCALAR_PH:.*]]: -; VF8UF1-NEXT: br label %[[LOOP:.*]] -; VF8UF1: [[LOOP]]: -; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF8UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]] -; VF8UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]] -; VF8UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1 -; VF8UF1-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16 -; VF8UF1-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]] ; VF8UF1: [[EXIT]]: ; VF8UF1-NEXT: ret void ; @@ -922,22 +886,13 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF8UF2-NEXT: [[TMP81:%.*]] = mul i64 15, [[STEP]] ; VF8UF2-NEXT: [[TMP82:%.*]] = add i64 0, [[TMP81]] ; VF8UF2-NEXT: [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]] -; VF8UF2-NEXT: [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]] -; VF8UF2-NEXT: store i8 0, ptr [[TMP84]], align 1 +; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]] +; VF8UF2-NEXT: store i8 0, ptr [[GEP_DST]], align 1 ; VF8UF2-NEXT: br label %[[PRED_STORE_CONTINUE30]] ; VF8UF2: [[PRED_STORE_CONTINUE30]]: ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br label %[[EXIT:.*]] -; VF8UF2: [[SCALAR_PH:.*]]: -; VF8UF2-NEXT: br label %[[LOOP:.*]] -; VF8UF2: [[LOOP]]: -; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF8UF2-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]] -; VF8UF2-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]] -; VF8UF2-NEXT: store i8 0, ptr [[GEP_DST]], align 1 -; VF8UF2-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: ret void ; @@ -1114,22 +1069,13 @@ define void @scev_expand_step(i64 %x, ptr %dst) { ; VF16UF1-NEXT: [[TMP80:%.*]] = mul i64 15, [[STEP]] ; VF16UF1-NEXT: [[TMP81:%.*]] = add i64 0, [[TMP80]] ; VF16UF1-NEXT: [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]] -; VF16UF1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]] -; VF16UF1-NEXT: store i8 0, ptr [[TMP83]], align 1 +; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]] +; VF16UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1 ; VF16UF1-NEXT: br label %[[PRED_STORE_CONTINUE30]] ; VF16UF1: [[PRED_STORE_CONTINUE30]]: ; VF16UF1-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br label %[[EXIT:.*]] -; VF16UF1: [[SCALAR_PH:.*]]: -; VF16UF1-NEXT: br label %[[LOOP:.*]] -; VF16UF1: [[LOOP]]: -; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; VF16UF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[STEP]] -; VF16UF1-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]] -; VF16UF1-NEXT: store i8 0, ptr [[GEP_DST]], align 1 -; VF16UF1-NEXT: [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll index 5a0c69b..06b0448 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll @@ -753,3 +753,50 @@ exit: %r.0.lcssa = phi i64 [ %rdx.next, %loop ] ret i64 %r.0.lcssa } + +define i64 @print_mulacc_duplicate_extends(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { +; CHECK-LABEL: 'print_mulacc_duplicate_extends' +; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF +; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count +; CHECK-EMPTY: +; CHECK: vector.ph: +; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<1> +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: <x1> vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]> +; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_NEXT:%.+]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]> +; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]> +; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]> +; CHECK-NEXT: EXPRESSION vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.sub (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD0]]> sext to i64)) +; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i16, ptr %x, i32 %iv + %load0 = load i16, ptr %arrayidx, align 4 + %conv0 = sext i16 %load0 to i32 + %mul = mul nsw i32 %conv0, %conv0 + %conv = sext i32 %mul to i64 + %rdx.next = sub nsw i64 %rdx, %conv + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + %r.0.lcssa = phi i64 [ %rdx.next, %loop ] + ret i64 %r.0.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index 06b7bd8..d08ca8c 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -21,19 +21,6 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: br label [[LOOP_LATCH]] -; CHECK: loop.latch: -; CHECK-NEXT: [[F_0_I:%.*]] = phi ptr [ [[A]], [[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[F_0_I]], i64 1 -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[IV]] -; CHECK-NEXT: store ptr [[GEP]], ptr [[GEP_B]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], -128 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -78,17 +65,6 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[SRC]], align 8 -; CHECK-NEXT: [[GEP_L:%.*]] = getelementptr float, ptr [[L]], i64 [[N]] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store ptr [[GEP_L]], ptr [[GEP_DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -131,17 +107,6 @@ define void @wide_gep_multiple_indices_some_invariant(ptr noalias %dst, ptr noal ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: scalar.ph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[SRC]], align 8 -; CHECK-NEXT: [[GEP_L:%.*]] = getelementptr [10 x float], ptr [[L]], i32 [[X]], i64 [[IV]] -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store ptr [[GEP_L]], ptr [[GEP_DST]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll index 055f2fd..922ebe7 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-intrinsic.ll @@ -20,17 +20,6 @@ define void @powi_only_first_lane_used_of_second_arg(ptr %p, i32 %pow) { ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr float, ptr [[P]], i32 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load float, ptr [[P_GEP]], align 4 -; CHECK-NEXT: [[Y:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 [[POW]]) -; CHECK-NEXT: store float [[Y]], ptr [[P_GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: [[DONE:%.*]] = icmp eq i32 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll index 435e6fc..5e9fe8c 100644 --- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll +++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll @@ -34,8 +34,8 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15 ; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[MIDDLE_BLOCK:%.*]] -; CHECK: middle.block: +; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = tail call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[AND]]) ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = tail call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[PSRC_ADDR_0_LCSSA]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison) ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_MASKED_LOAD]] to <16 x i32> @@ -44,7 +44,7 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SUM_0_LCSSA]], [[TMP6]] ; CHECK-NEXT: br label [[WHILE_END5]] ; CHECK: while.end5: -; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], [[WHILE_END]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA]], [[WHILE_END]] ], [ [[TMP7]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUM_1_LCSSA]], [[BLOCKSIZE]] ; CHECK-NEXT: [[CONV6:%.*]] = trunc i32 [[DIV]] to i8 ; CHECK-NEXT: store i8 [[CONV6]], ptr [[PRESULT:%.*]], align 1 diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index d34ac2b..85c8ed2 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -424,6 +424,174 @@ join: ret ptr %phi } +define void @hoist_captures_same(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_same( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_different(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_different( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_overlap(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_overlap( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META10]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address", !"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META9]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address_is_null"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + +define void @hoist_captures_subsume2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_subsume2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8, !captures [[META11:![0-9]+]] +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"read_provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_full_set(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_full_set( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"provenance"} + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one1(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one1( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +else: + store ptr %x, ptr %y + br label %out + +out: + ret void +} + +define void @hoist_captures_only_one2(i1 %c, ptr %x, ptr %y) { +; CHECK-LABEL: @hoist_captures_only_one2( +; CHECK-NEXT: if: +; CHECK-NEXT: store ptr [[X:%.*]], ptr [[Y:%.*]], align 8 +; CHECK-NEXT: ret void +; +if: + br i1 %c, label %then, label %else + +then: + store ptr %x, ptr %y + br label %out + +else: + store ptr %x, ptr %y, !captures !{!"address"} + br label %out + +out: + ret void +} + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} @@ -445,4 +613,7 @@ join: ; CHECK: [[META6]] = !{float 2.500000e+00} ; CHECK: [[META7]] = !{i32 5, i32 6} ; CHECK: [[META8]] = !{i32 4, i32 5} +; CHECK: [[META9]] = !{!"address"} +; CHECK: [[META10]] = !{!"address", !"read_provenance"} +; CHECK: [[META11]] = !{!"provenance"} ;. |