aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LoopUnroll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/LoopUnroll')
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll (renamed from llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll)0
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll160
-rw-r--r--llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll68
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll4
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll58
-rw-r--r--llvm/test/Transforms/LoopUnroll/runtime-loop.ll9
-rw-r--r--llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll66
7 files changed, 343 insertions, 22 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll
index 1339afe..1339afe 100644
--- a/llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/peel.ll
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
new file mode 100644
index 0000000..96b31d8
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll
@@ -0,0 +1,160 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after loop unrolling with an epilogue.
+
+; ------------------------------------------------------------------------------
+; Define substitutions.
+;
+; Check original loop body frequency.
+; DEFINE: %{bf-fc} = opt %s -S -passes='print<block-freq>' 2>&1 | \
+; DEFINE: FileCheck %s -check-prefixes
+;
+; Unroll loops and then check block frequency. The -implicit-check-not options
+; make sure that no additional labels or @f calls show up.
+; DEFINE: %{ur-bf} = opt %s -S -passes='loop-unroll,print<block-freq>' 2>&1
+; DEFINE: %{fc} = FileCheck %s \
+; DEFINE: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \
+; DEFINE: -implicit-check-not='call void @f' -check-prefixes
+
+; ------------------------------------------------------------------------------
+; Check various interesting unroll count values relative to the original loop's
+; estimated trip count of 11 (e.g., minimum and boundary values).
+;
+; RUN: %{bf-fc} ALL,ORIG
+; RUN: %{ur-bf} -unroll-count=2 -unroll-runtime | %{fc} ALL,UR,UR2
+; RUN: %{ur-bf} -unroll-count=4 -unroll-runtime | %{fc} ALL,UR,UR4
+; RUN: %{ur-bf} -unroll-count=10 -unroll-runtime | %{fc} ALL,UR,UR10
+; RUN: %{ur-bf} -unroll-count=11 -unroll-runtime | %{fc} ALL,UR,UR11
+; RUN: %{ur-bf} -unroll-count=12 -unroll-runtime | %{fc} ALL,UR,UR12
+
+; ------------------------------------------------------------------------------
+; Check the iteration frequencies, which, when each is multiplied by the number
+; of original loop bodies that execute within it, should sum to almost exactly
+; the original loop body frequency.
+;
+; ALL-LABEL: block-frequency-info: test
+;
+; ORIG: - [[ENTRY:.*]]:
+; ORIG: - [[DO_BODY:.*]]: float = 11.0,
+; ORIG: - [[DO_END:.*]]:
+;
+; UR: - [[ENTRY:.*]]:
+; UR: - [[ENTRY_NEW:.*]]:
+; UR2: - [[DO_BODY:.*]]: float = 5.2381,
+; UR4: - [[DO_BODY:.*]]: float = 2.3702,
+; UR10: - [[DO_BODY:.*]]: float = 0.6902,
+; UR11: - [[DO_BODY:.*]]: float = 0.59359,
+; UR12: - [[DO_BODY:.*]]: float = 0.5144,
+; UR: - [[DO_END_UNR_LCSSA:.*]]:
+; UR: - [[DO_BODY_EPIL_PREHEADER:.*]]:
+; UR2: - [[DO_BODY_EPIL:.*]]: float = 0.52381,
+; UR4: - [[DO_BODY_EPIL:.*]]: float = 1.5193,
+; UR10: - [[DO_BODY_EPIL:.*]]: float = 4.098,
+; UR11: - [[DO_BODY_EPIL:.*]]: float = 4.4705,
+; UR12: - [[DO_BODY_EPIL:.*]]: float = 4.8272,
+; UR4: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR10: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR11: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR12: - [[DO_END_EPILOG_LCSSA:.*]]:
+; UR: - [[DO_END:.*]]:
+
+; ------------------------------------------------------------------------------
+; Check the CFGs, including the number of original loop bodies that appear
+; within each unrolled iteration.
+;
+; UR-LABEL: define void @test(i32 %{{.*}}) {
+; UR: [[ENTRY]]:
+; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[ENTRY_NEW]], !prof ![[#PROF_UR_GUARD:]]{{$}}
+; UR: [[ENTRY_NEW]]:
+; UR: br label %[[DO_BODY]]
+; UR: [[DO_BODY]]:
+; UR2-COUNT-2: call void @f
+; UR4-COUNT-4: call void @f
+; UR10-COUNT-10: call void @f
+; UR11-COUNT-11: call void @f
+; UR12-COUNT-12: call void @f
+; UR: br i1 %{{.*}}, label %[[DO_END_UNR_LCSSA]], label %[[DO_BODY]], !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]{{$}}
+; UR: [[DO_END_UNR_LCSSA]]:
+; UR: br i1 %{{.*}}, label %[[DO_BODY_EPIL_PREHEADER]], label %[[DO_END:.*]], !prof ![[#PROF_RM_GUARD:]]{{$}}
+; UR: [[DO_BODY_EPIL_PREHEADER]]:
+; UR: br label %[[DO_BODY_EPIL]]
+; UR: [[DO_BODY_EPIL]]:
+; UR: call void @f
+; UR4: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR10: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR11: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR12: br i1 %{{.*}}, label %[[DO_BODY_EPIL]], label %[[DO_END_EPILOG_LCSSA]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]{{$}}
+; UR4: [[DO_END_EPILOG_LCSSA]]:
+; UR10: [[DO_END_EPILOG_LCSSA]]:
+; UR11: [[DO_END_EPILOG_LCSSA]]:
+; UR12: [[DO_END_EPILOG_LCSSA]]:
+; UR: br label %[[DO_END]]
+; UR: [[DO_END]]:
+; UR: ret void
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+entry:
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp sge i32 %inc, %n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 10}
+
+; ------------------------------------------------------------------------------
+; Check branch weight metadata and estimated trip count metadata.
+;
+; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862}
+; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250}
+; UR10: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1236740947, i32 910742701}
+; UR11: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1319535738, i32 827947910}
+; UR12: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 1394803730, i32 752679918}
+;
+; UR2: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 372703773, i32 1774779875}
+; UR4: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 680723421, i32 1466760227}
+; UR10: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1319535738, i32 827947910}
+; UR11: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1394803730, i32 752679918}
+; UR12: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 1463229177, i32 684254471}
+;
+; UR2: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR4: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR10: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR11: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR12: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+;
+; UR2: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 5}
+; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2}
+; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}
+; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}
+; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
+; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
+;
+; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388}
+; UR4: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1531603292, i32 615880356}
+; UR10: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1829762672, i32 317720976}
+; UR11: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1846907894, i32 300575754}
+; UR12: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1860963812, i32 286519836}
+;
+; UR4: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1038564635, i32 1108919013}
+; UR10: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1656332913, i32 491150735}
+; UR11: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1693034047, i32 454449601}
+; UR12: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1723419551, i32 424064097}
+
+; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+;
+; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
+; For UR10, llvm.loop.estimated_trip_count is the same for both loops.
+; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
+; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11}
diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
new file mode 100644
index 0000000..cde9d46
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-partial.ll
@@ -0,0 +1,68 @@
+; Test branch weight metadata, estimated trip count metadata, and block
+; frequencies after partial loop unrolling without -unroll-runtime.
+
+; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
+; RUN: FileCheck -check-prefix=CHECK %s
+
+; The -implicit-check-not options make sure that no additional labels or calls
+; to @f show up.
+; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \
+; RUN: -unroll-count=4 2>&1 | \
+; RUN: FileCheck %s -check-prefix=CHECK-UR \
+; RUN: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \
+; RUN: -implicit-check-not='call void @f'
+
+; CHECK: block-frequency-info: test
+; CHECK: do.body: float = 10.0,
+
+; The sum should still be ~10.
+;
+; CHECK-UR: block-frequency-info: test
+; CHECK-UR: - [[ENTRY:.*]]:
+; CHECK-UR: - [[DO_BODY:.*]]: float = 2.9078,
+; CHECK-UR: - [[DO_BODY_1:.*]]: float = 2.617,
+; CHECK-UR: - [[DO_BODY_2:.*]]: float = 2.3553,
+; CHECK-UR: - [[DO_BODY_3:.*]]: float = 2.1198,
+; CHECK-UR: - [[DO_END:.*]]:
+
+declare void @f(i32)
+
+define void @test(i32 %n) {
+; CHECK-UR-LABEL: define void @test(i32 %{{.*}}) {
+; CHECK-UR: [[ENTRY]]:
+; CHECK-UR: br label %[[DO_BODY]]
+; CHECK-UR: [[DO_BODY]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_1]], !prof ![[#PROF:]]
+; CHECK-UR: [[DO_BODY_1]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_2]], !prof ![[#PROF]]
+; CHECK-UR: [[DO_BODY_2]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_3]], !prof ![[#PROF]]
+; CHECK-UR: [[DO_BODY_3]]:
+; CHECK-UR: call void @f
+; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK-UR: [[DO_END]]:
+; CHECK-UR: ret void
+
+entry:
+ br label %do.body
+
+do.body:
+ %i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
+ %inc = add i32 %i, 1
+ call void @f(i32 %i)
+ %c = icmp sge i32 %inc, %n
+ br i1 %c, label %do.end, label %do.body, !prof !0
+
+do.end:
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 9}
+
+; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9}
+; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
+; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index 0c52b5a0..0473601 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -188,7 +188,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
; CHECK-NEXT: [[L_1_LCSSA_UNR:%.*]] = phi i32 [ poison, [[OUTER_HEADER]] ], [ [[L_1_LCSSA_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[INNER_1_IV_UNR:%.*]] = phi i64 [ [[X]], [[OUTER_HEADER]] ], [ [[INNER_1_IV_UNR_PH]], [[INNER_1_HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 7
-; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF3]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[OUTER_MIDDLE:%.*]], label [[OUTER_HEADER_NEW:%.*]], !prof [[PROF6:![0-9]+]]
; CHECK: outer.header.new:
; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]]
; CHECK: inner.1.header:
@@ -232,7 +232,7 @@ define void @pr56286(i64 %x, ptr %src, ptr %dst, ptr %ptr.src) !prof !0 {
; CHECK-NEXT: store i32 [[L_1_7]], ptr [[DST]], align 8
; CHECK-NEXT: [[INNER_1_IV_NEXT_7]] = add i64 [[INNER_1_IV]], 8
; CHECK-NEXT: [[CMP_2_7:%.*]] = icmp sgt i64 [[INNER_1_IV_NEXT_6]], 0
-; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP_2_7]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[INNER_1_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: outer.middle.unr-lcssa:
; CHECK-NEXT: [[L_1_LCSSA_PH:%.*]] = phi i32 [ [[L_1_7]], [[INNER_1_LATCH_7]] ]
; CHECK-NEXT: br label [[OUTER_MIDDLE]]
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
index 2617199..2f8f98d 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
@@ -2,12 +2,24 @@
;; Check that the remainder loop is properly assigned a branch weight for its latch branch.
; CHECK-LABEL: @test(
-; CHECK-LABEL: for.body:
-; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]]
-; CHECK-LABEL: for.body.epil:
-; CHECK: br i1 [[COND2:%.*]], label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
-; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 2499}
-; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK-LABEL: entry:
+; CHECK: [[FOR_BODY_PREHEADER:.*]]:
+; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]], !prof ![[#PROF_UR_GUARD:]]
+; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
+; CHECK: br label %for.body
+; CHECK: for.body:
+; CHECK: %add = add
+; CHECK: %add.1 = add
+; CHECK: %add.2 = add
+; CHECK: %add.3 = add
+; CHECK-NOT: %add.4 = add
+; CHECK: br i1 %{{.*}}, label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %for.body, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
+; CHECK: br i1 %{{.*}}, label %[[FOR_BODY_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]], !prof ![[#PROF_RM_GUARD:]]
+; CHECK: [[FOR_BODY_EPIL_PREHEADER]]:
+; CHECK: br label %[[FOR_BODY_EPIL:.*]]
+; CHECK: [[FOR_BODY_EPIL]]:
+; CHECK: br i1 {{.*}}, label %[[FOR_BODY_EPIL]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]
define i3 @test(ptr %a, i3 %n) {
entry:
@@ -31,3 +43,37 @@ for.end:
}
!0 = !{!"branch_weights", i32 1, i32 9999}
+
+; Original loop probability: p = 9999/(1+9999) = 0.9999
+; Original estimated trip count: (1+9999)/1 = 10000
+; Unroll count: 4
+
+; Probability of >=3 iterations after first: p^3 = 0.9970003 =~
+; 2146839468 / (644180 + 2146839468).
+; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 644180, i32 2146839468}
+
+; Probability of >=4 more iterations: p^4 = 0.99960006 =~
+; 2146624784 / (858864 + 2146624784).
+; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 858864, i32 2146624784}
+
+; 10000//4 = 2500
+; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2500}
+;
+; CHECK: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
+
+; Probability of 1 to 3 more of 3 more remainder iterations:
+; (p-p^4)/(1-p^4) = 0.749962497 =~ 1610532724 / (1610532724 + 536950924).
+; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1610532724, i32 536950924}
+
+; Frequency of first remainder iter: r1 = 1
+; Frequency of second remainder iter: r2 = r1*(p-p^3)/(1-p^3) = 0.666633331
+; Frequency of third remainder iter: r3 = r2*(p-p^2)/(1-p^2) = 0.333299999
+; Solve for loop probability that produces that frequency: f = 1/(1-p') =>
+; p' = 1-1/f = 1-1/(r1+r2+r3) = 0.499983332 =~
+; 1073706403 / (1073706403 + 1073777245).
+; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245}
+
+; 10000%4 = 0
+; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index 492de06..ec7aba4 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -295,11 +295,12 @@ exit2.loopexit:
; COMMON-LABEL: {{^}}!0 =
; EPILOG: [[EPILOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
-; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 1, i32 127}
-; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1, i32 7}
-; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 3, i32 1}
+; EPILOG: [[EPILOG_PROF_1]] = !{!"branch_weights", i32 326124004, i32 1821359644}
+; EPILOG: [[EPILOG_PROF_2]] = !{!"branch_weights", i32 1856428066, i32 291055582}
+; EPILOG: [[EPILOG_PROF_3]] = !{!"branch_weights", i32 1597681585, i32 549802063}
-; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_LOOP_1:![0-9]+]]}
+; EPILOG: [[EPILOG_LOOP]] = distinct !{[[EPILOG_LOOP]], [[EPILOG_TC:![0-9]+]], [[EPILOG_LOOP_1:![0-9]+]]}
+; EPILOG: [[EPILOG_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
; EPILOG: [[EPILOG_LOOP_1]] = !{!"llvm.loop.unroll.disable"}
; PROLOG: [[PROLOG_PROF_0]] = !{!"branch_weights", i32 1, i32 11}
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
index 611ee5f..02f5bf9 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -3,14 +3,27 @@
@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
; CHECK-LABEL: @bar_prof
-; CHECK: loop:
-; CHECK: %mul = mul
-; CHECK: %mul.1 = mul
-; CHECK: %mul.2 = mul
-; CHECK: %mul.3 = mul
-; CHECK: br i1 %niter.ncmp.7, label %loop.end.unr-lcssa, label %loop, !prof [[PROF0:![0-9]+]]
-; CHECK: loop.epil:
-; CHECK: br i1 %epil.iter.cmp, label %loop.epil, label %loop.end.epilog-lcssa, !prof [[PROF1:![0-9]+]], !llvm.loop {{![0-9]+}}
+; CHECK: entry:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]], !prof ![[#PROF_UR_GUARD:]]
+; CHECK: [[ENTRY_NEW]]:
+; CHECK: br label %loop
+; CHECK: loop:
+; CHECK: %mul = mul
+; CHECK: %mul.1 = mul
+; CHECK: %mul.2 = mul
+; CHECK: %mul.3 = mul
+; CHECK: %mul.4 = mul
+; CHECK: %mul.5 = mul
+; CHECK: %mul.6 = mul
+; CHECK: %mul.7 = mul
+; CHECK-NOT: %mul.8 = mul
+; CHECK: br i1 %{{.*}}, label %[[LOOP_END_UNR_LCSSA:.*]], label %loop, !prof ![[#PROF_UR_LATCH:]], !llvm.loop ![[#LOOP_UR_LATCH:]]
+; CHECK: [[LOOP_END_UNR_LCSSA]]:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL_PREHEADER]], label %loop.end, !prof ![[#PROF_RM_GUARD:]]
+; CHECK: [[LOOP_EPIL_PREHEADER]]:
+; CHECK: br label %[[LOOP_EPIL:.*]]
+; CHECK: [[LOOP_EPIL]]:
+; CHECK: br i1 %{{.*}}, label %[[LOOP_EPIL]], label %[[LOOP_END_EPILOG_LCSSA:.*]], !prof ![[#PROF_RM_LATCH:]], !llvm.loop ![[#LOOP_RM_LATCH:]]
define i32 @bar_prof(ptr noalias nocapture readonly %src, i64 %c) !prof !1 {
entry:
br label %loop
@@ -60,5 +73,38 @@ loop.end:
!1 = !{!"function_entry_count", i64 1}
!2 = !{!"branch_weights", i32 1, i32 1000}
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 124}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 1}
+; Original loop probability: p = 1000/(1+1000) = 0.999000999
+; Original estimated trip count: (1+1000)/1 = 1001
+; Unroll count: 8
+
+; Probability of >=7 iterations after first: p^7 = 0.993027916 =~
+; 2132511214 / (14972434 + 2132511214).
+; CHECK: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 14972434, i32 2132511214}
+
+; Probability of >=8 more iterations: p^8 = 0.99203588 =~
+; 2130380833 / (17102815 + 2130380833).
+; CHECK: ![[#PROF_UR_LATCH]] = !{!"branch_weights", i32 17102815, i32 2130380833}
+
+; 1001//8 = 125
+; CHECK: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]]}
+; CHECK: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 125}
+
+; Probability of 1 to 7 more of 7 more remainder iterations:
+; (p-p^8)/(1-p^8) = 0.874562282 =~ 1878108210 / (1878108210 + 269375438).
+; CHECK: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1878108210, i32 269375438}
+
+; Frequency of first remainder iter: r1 = 1
+; Frequency of second remainder iter: r2 = r1*(p-p^7)/(1-p^7) = 0.856714143
+; Frequency of third remainder iter: r3 = r2*(p-p^6)/(1-p^6) = 0.713571429
+; Frequency of fourth remainder iter: r4 = r2*(p-p^5)/(1-p^5) = 0.570571715
+; Frequency of fifth remainder iter: r5 = r2*(p-p^4)/(1-p^4) = 0.427714858
+; Frequency of sixth remainder iter: r6 = r2*(p-p^3)/(1-p^3) = 0.285000715
+; Frequency of seventh remainder iter: r7 = r2*(p-p^2)/(1-p^2) = 0.142429143
+; Solve for loop probability that produces that frequency: f = 1/(1-p') =>
+; p' = 1-1/f = 1-1/(r1+r2+r3+r4+r5+r6+r7) = 0.749749875 =~
+; 1610075606 / (1610075606 + 537408042).
+; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1610075606, i32 537408042}
+
+; Remainder estimated trip count: 1001%8 = 1
+; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]}
+; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1}