; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p loop-unroll -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s define i32 @test_add(ptr %src, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.next = add i32 %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next } define i32 @test_add_tc_not_multiple_of_4(ptr %src, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_tc_not_multiple_of_4( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_1:.*]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP_1]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1001 ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_1]], label %[[EXIT:.*]] ; CHECK: [[LOOP_1]]: ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_12:%.*]] = load i32, ptr [[GEP_SRC_12]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_12]] ; CHECK-NEXT: br label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.next = add i32 %rdx, %l %ec = icmp ne i64 %iv.next, 1001 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next } define i32 @test_add_rdx_used_in_loop(ptr %src, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_rdx_used_in_loop( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: store i32 [[RDX_NEXT]], ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: store i32 [[RDX_NEXT_1]], ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: store i32 [[RDX_NEXT_2]], ptr [[GEP_SRC_2]], align 4 ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_NEXT_2]], [[L_24]] ; CHECK-NEXT: store i32 [[RDX_NEXT_24]], ptr [[GEP_SRC_24]], align 4 ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.next = add i32 %rdx, %l store i32 %rdx.next, ptr %gep.src %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next } define i32 @test_add_phi_used_outside_loop(ptr %src, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_phi_used_outside_loop( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_2]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[RDX_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.next = add i32 %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx } define i32 @test_add_and_mul_reduction(ptr %src, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_and_mul_reduction( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_1_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX_2:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_1_NEXT:%.*]] = add i32 [[RDX_1]], [[L]] ; CHECK-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_1_2:%.*]] = add i32 [[RDX_1_NEXT]], [[L_1]] ; CHECK-NEXT: [[RDX_2_2:%.*]] = mul i32 [[RDX_2_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_1_NEXT_2:%.*]] = add i32 [[RDX_1_2]], [[L_2]] ; CHECK-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_2]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_1_NEXT_3]] = add i32 [[RDX_1_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_1_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_1_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: [[BIN_RDX5:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: [[RES:%.*]] = add i32 [[RDX_1_NEXT_LCSSA]], [[BIN_RDX5]] ; CHECK-NEXT: ret i32 [[RES]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx.1 = phi i32 [ %start, %entry ], [ %rdx.1.next, %loop ] %rdx.2 = phi i32 [ %start, %entry ], [ %rdx.2.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.1.next = add i32 %rdx.1, %l %rdx.2.next = mul i32 %rdx.2, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: %res = add i32 %rdx.1.next, %rdx.2.next ret i32 %res } define float @test_fadd_no_fmfs(ptr %src, i64 %n, float %start) { ; CHECK-LABEL: define float @test_fadd_no_fmfs( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep.src, align 1 %rdx.next = fadd float %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret float %rdx.next } define float @test_fadd_with_ressaoc(ptr %src, i64 %n, float %start) { ; CHECK-LABEL: define float @test_fadd_with_ressaoc( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], float [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = fadd float [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = fadd float [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = fadd float [[RDX_NEXT_1]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load float, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = fadd float [[RDX_NEXT_2]], [[L_24]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi float [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret float [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi float [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr float, ptr %src, i64 %iv %l = load float, ptr %gep.src, align 1 %rdx.next = fadd float %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret float %rdx.next } define i32 @test_smin(ptr %src, i64 %n) { ; CHECK-LABEL: define i32 @test_smin( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[MIN:%.*]] = phi i32 [ 1000, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = call i32 @llvm.smin.i32(i32 [[MIN]], i32 [[L]]) ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT]], i32 [[L_1]]) ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_1]], i32 [[L_2]]) ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[RDX_NEXT_3]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT_2]], i32 [[L_24]]) ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %min = phi i32 [ 1000, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i32, ptr %src, i64 %iv %l = load i32 , ptr %gep.src, align 1 %rdx.next = call i32 @llvm.smin(i32 %min, i32 %l) %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next } define i64 @test_any_of_reduction(ptr %src, i64 %n) { ; CHECK-LABEL: define i64 @test_any_of_reduction( ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[ANY_OF_RDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = select i1 [[C]], i64 [[ANY_OF_RDX]], i64 0 ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L_1]], 0 ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = select i1 [[C_1]], i64 [[RDX_NEXT]], i64 0 ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_SRC_2]], align 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp eq i8 [[L_2]], 0 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = select i1 [[C_2]], i64 [[RDX_NEXT_1]], i64 0 ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_24:%.*]] = load i8, ptr [[GEP_SRC_24]], align 1 ; CHECK-NEXT: [[C_24:%.*]] = icmp eq i8 [[L_24]], 0 ; CHECK-NEXT: [[RDX_NEXT_3]] = select i1 [[C_24]], i64 [[RDX_NEXT_2]], i64 0 ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i64 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret i64 [[RDX_NEXT_LCSSA]] ; entry: br label %loop loop: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] %any.of.rdx = phi i64 [ %rdx.next, %loop ], [ 0, %entry ] %iv.next = add i64 %iv, 1 %gep.src = getelementptr i8, ptr %src, i64 %iv %l = load i8, ptr %gep.src, align 1 %c = icmp eq i8 %l, 0 %rdx.next = select i1 %c, i64 %any.of.rdx, i64 0 %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i64 %rdx.next } define void @reduction_with_intermediate_store(ptr %src, ptr %sum) { ; CHECK-LABEL: define void @reduction_with_intermediate_store( ; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4 ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]] ; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4 ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]] ; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4 ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 ; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]] ; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4 ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4 ; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]] ; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4 ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; entry: %sum.promoted = load i32, ptr %sum, align 4 br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ] %gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv %l = load i32, ptr %gep.src, align 4 %red.next = add nsw i32 %red, %l store i32 %red.next, ptr %sum, align 4 %iv.next = add nuw nsw i64 %iv, 1 %ec = icmp eq i64 %iv.next, 10000 br i1 %ec, label %exit, label %loop exit: ret void } declare i32 @foo() ; Loop with a call cannot be handled by LoopVectorize, introducing additional ; accumulators when unrolling increases throughput. define i32 @test_add_with_call(i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_with_call( ; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = call i32 @foo() ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo() ; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo() ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo() ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[BIN_RDX2]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %l = call i32 @foo() %rdx.next = add i32 %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next } ; Loop with backward dependence cannot be handled LoopVectorize, introducing additional ; accumulators when unrolling increases throughput. define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) { ; CHECK-LABEL: define i32 @test_add_with_backward_dep( ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] ; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4 ; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4 ; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4 ; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4 ; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4 ; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]] ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4 ; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]] ; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4 ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]] ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] ; CHECK-NEXT: ret i32 [[BIN_RDX3]] ; entry: br label %loop loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] %iv.next = add i64 %iv, 1 %gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv %l = load i32, ptr %gep %gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next store i32 0, ptr %gep.1 %rdx.next = add i32 %rdx, %l %ec = icmp ne i64 %iv.next, 1000 br i1 %ec, label %loop, label %exit exit: ret i32 %rdx.next }