diff options
-rw-r--r-- | llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll | 105 |
1 files changed, 104 insertions, 1 deletions
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll index 0d3d242..653970c 100644 --- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll +++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='loop(indvars),instcombine' -S < %s | FileCheck %s +; RUN: opt -passes='loop(indvars),instcombine' -replexitval=always -S < %s | FileCheck %s ;; Test that loop's exit value is rewritten to its initial ;; value from loop preheader @@ -197,3 +197,106 @@ crit_edge: ret i16 %conv } +define i32 @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { +; CHECK-LABEL: @vscale_slt_with_vp_umin( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2 +; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: early.exit: +; CHECK-NEXT: ret i32 0 +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[LEFT:%.*]] = sub nsw i32 [[N]], [[I_05]] +; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]]) +; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]]) +; CHECK-NEXT: ret i32 [[UMIN]] +; +entry: + %vscale = call i32 @llvm.vscale.i32() + %VF = shl nuw nsw i32 %vscale, 2 + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %early.exit + +early.exit: + ret i32 0 + +for.body: + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 + %left = sub i32 %n, %i.05 + %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) + store i32 %VF.capped, ptr %A + + %add = add nsw i32 %i.05, %VF + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret i32 %VF.capped +} + +define i32 @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { +; CHECK-LABEL: @vscale_slt_with_vp_umin2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2 +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[VF]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: early.exit: +; CHECK-NEXT: ret i32 0 +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[LEFT:%.*]] = sub i32 [[N]], [[I_05]] +; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]]) +; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]]) +; CHECK-NEXT: ret i32 [[UMIN]] +; +entry: + %vscale = call i32 @llvm.vscale.i32() + %VF = shl nuw nsw i32 %vscale, 2 + %cmp4 = icmp sgt i32 %n, %VF + br i1 %cmp4, label %for.body, label %early.exit + +early.exit: + ret i32 0 + +for.body: + %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 + %left = sub i32 %n, %i.05 + %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) + store i32 %VF.capped, ptr %A + + %add = add nsw i32 %i.05, %VF + %cmp = icmp slt i32 %add, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret i32 %VF.capped +} |